import argparse
import os
import json
from coder import Coder

parser = argparse.ArgumentParser()
parser.add_argument("--folder", required= True, type = str)
# parser.add_argument("--llm_name", required= True, type = str)

args = parser.parse_args()
folder = args.folder

# folder = "./temp_result2"

# llm_name = args.llm_name


coder = Coder()
encode_method_list = coder.get_target_encode_method(target = "en")
special_tokens = [
    "<|end_of_text|>", 
    "<|eot_id|>",
    "<|start_header_id|>",
    "<|im_end|>",
]

def read_and_write_json_files(folder_path):
    # 遍历指定文件夹
    for root, dirs, files in os.walk(folder_path):
            for filename in files:
                # 检查文件是否以 .json 结尾
                if filename.endswith('.json'):
                    file_path = os.path.join(root, filename)
                try:
                    # 读取 JSON 文件
                    with open(file_path, 'r', encoding='utf-8') as f:
                        data = json.load(f)
                    
                    # 尝试根据file_path获取method
                    method = ""
                    for encode_method in encode_method_list:
                        if encode_method in file_path:
                            method = encode_method
                    if not method:
                        raise ValueError("Empty method")
                    '''
                    data的形式为:
                    [
                        [
                            {
                                "category": 
                                "or_question": 
                                "en_question": 
                                "or_answer": 
                                "de_answer": 
                                "executing_times":
                            }
                        ]
                    ]
                    '''

                    for i, _ in enumerate(data):
                        seperate_result_list = data[i]
                        for j, _ in enumerate(seperate_result_list):
                            result = seperate_result_list[j]
                            # 开始取消所有的speicial tokens
                            for token in special_tokens:
                                result['or_answer'] = result['or_answer'].replace(token, "") 
                                result['de_answer'] = coder.decode(result['or_answer'], "en", method)
                                # 完成将删除特殊字符后，重新解码
                    # 将数据重新写回到原文件
                    with open(file_path, 'w', encoding='utf-8') as f:
                        json.dump(data, f, ensure_ascii=False, indent=4)
                    
                    print(f"文件 {filename} 处理完成")
                
                except:
                    print(f"处理文件 {filename} 时出错")


read_and_write_json_files(folder_path=folder)
