import csv

def read_csv(file_path):
    """读取CSV文件并返回一个包含id和name的字典"""
    with open(file_path, 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        return {row['name'].replace('(','').replace(')','').replace(' ','_'): row['id'] for row in reader}

def read_txt(file_path):
    """读取TXT文件并返回所有行内容的列表"""
    with open(file_path, 'r', encoding='utf-8') as txtfile:
        return [line.strip() for line in txtfile.readlines()]

def write_txt(file_path, data):
    """将数据写入新的TXT文件"""
    with open(file_path, 'w', encoding='utf-8') as txtfile:
        for item in data:
            txtfile.write(f"{item}\n")

# 定义输入和输出文件路径
csv_file = 'k400_novel_labels.csv'
txt_file = 'vallist.txt'
output_file = 'k400_novel_vallist.txt'

# 读取CSV文件和TXT文件
csv_data = read_csv(csv_file)
txt_data = read_txt(txt_file)

# 找到重复的name并将内容和对应的id写入新的TXT文件
output_data = []
idlist = set()
for line in txt_data:
    cls_name = line.split('/')[0]
    # print(cls_name)
    if cls_name in csv_data.keys():
        output_data.append(line.split(' ')[0]+' '+csv_data[cls_name])
        idlist.add(csv_data[cls_name])
        # output_data.append(f"{csv_data[line]}, {line}")
print(sorted([int(i) for i in list(idlist)]))
print(len(sorted([int(i) for i in list(idlist)])))
# print(output_data)
# 写入输出文件
write_txt(output_file, output_data)

print("文件处理完成，重复项已写入新的TXT文件。")
