41 lines
1.4 KiB
Python
41 lines
1.4 KiB
Python
import os
|
|
import csv
|
|
from collections import Counter
|
|
|
|
# 获取当前目录下的所有txt文件
|
|
folder_path = r'R:\TYSAR-德清院\TYSAR-条带模式(SM)\港口\20250910-不分类\C-人工检查\删除空标注' # 当前文件夹
|
|
txt_files = [f for f in os.listdir(folder_path) if f.endswith('.txt')]
|
|
|
|
print(f"找到 {len(txt_files)} 个txt文件:")
|
|
for file in txt_files:
|
|
print(f" {file}")
|
|
|
|
# 提取每个文件名的前50个字符
|
|
prefixes = [filename[:50] for filename in txt_files]
|
|
|
|
# 统计每个前缀出现的次数
|
|
prefix_counts = Counter(prefixes)
|
|
|
|
# 准备要写入CSV的数据
|
|
csv_data = [(prefix, count) for prefix, count in prefix_counts.items()]
|
|
|
|
# 将结果写入CSV文件
|
|
csv_filename = r'D:\TYSAR-德清院\unique_file_0919.csv'
|
|
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
|
|
writer = csv.writer(csvfile)
|
|
writer.writerow(['文件名前50个字符', '重复次数']) # 写入表头
|
|
writer.writerows(csv_data)
|
|
|
|
print(f"\n已将结果保存到 {csv_filename}")
|
|
print("\n重复统计详情:")
|
|
for prefix, count in csv_data:
|
|
print(f" '{prefix}': {count} 次")
|
|
|
|
# 显示重复的文件
|
|
duplicates = [item for item in csv_data if item[1] > 1]
|
|
if duplicates:
|
|
print("\n发现的重复文件:")
|
|
for prefix, count in duplicates:
|
|
print(f" 前缀 '{prefix}' 出现了 {count} 次")
|
|
else:
|
|
print("\n未发现重复文件") |