import csv
import json

def read_csv(file_path):
    """读取CSV文件并返回一个包含id和name的字典"""
    with open(file_path, 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        return {row['name']: row['id'] for row in reader}

def read_txt(file_path):
    """读取TXT文件并返回所有行内容的列表"""
    with open(file_path, 'r', encoding='utf-8') as txtfile:
        return [line.strip() for line in txtfile.readlines()]
 
def read_json(file_path):
    # 读取JSON文件内容
    with open(file_path, 'r', encoding='utf-8') as infile:
        data = json.load(infile)
    print(len(data))
    print(type(data))
    print(data[0])
    json_dict = {row['id']: row['template'].replace('[','').replace(']','') for row in data}
    return json_dict

def write_txt(file_path, data):
    """将数据写入新的TXT文件"""
    with open(file_path, 'w', encoding='utf-8') as txtfile:
        for item in data:
            txtfile.write(f"{item}\n")

# 定义输入和输出文件路径
csv_file = 'SSv2_novel_labels.csv'
json_file = 'validation.json'
txt_file = 'val_mp4.txt'
output_file = 'SSv2_novel_vallist.txt'

# 读取CSV文件和TXT文件
csv_data = read_csv(csv_file)
json_data = read_json(json_file)
txt_data = read_txt(txt_file)

# # 找到重复的name并将内容和对应的id写入新的TXT文件
output_data = []
idlist = set()
count=0
for line in txt_data:
    vid_id = line.split('.')[0]
    # print(vid_id)
    if json_data[vid_id] in csv_data.keys():
        count+=1
        output_data.append(line.split(' ')[0]+' '+csv_data[json_data[vid_id]])
        idlist.add(csv_data[json_data[vid_id]])
print(len(output_data))
print(sorted([int(i) for i in list(idlist)]))
print(len(sorted([int(i) for i in list(idlist)])))
# # print(output_data)
# # 写入输出文件
write_txt(output_file, output_data)

print("文件处理完成，重复项已写入新的TXT文件。")
