

# 定义路径
ORIGINAL_CHUNK_BASE = "/home/weishaohang/workspace/Omni-Temp/evaluation/chunks_for_tcelongbench"
TIMELITE_QA_BASE = "/home/weishaohang/workspace/Omni-Temp/TIME-Lite/QAs"
SAVE_CONTEXT_BASE = "/home/weishaohang/workspace/Omni-Temp/TIME-Lite/contexts/tcelongbench"
# dataset_names = [
#     # "wikidata", 
#     "tcelongbench", 
#     # "long_dialog"
#     ]

retriever_settings = ["bm25", "vector", "hybrid"]
tcelongbench_QAs_files_base_names = [
    "L2_1_QAs_multi_choice.json",
    "L1_2_QAs.json",
    "L1_5_QAs.json",
    "L1_4_QAs.json",
    "L2_3_QAs_multi_choice.json",
    "L2_2_QAs_multi_choice.json",
    "L3_4_QAs_multi_choice.json",
    "L3_2_QAs.json",
    "L1_3_QAs.json",
    "L3_1_QAs_multi_choice.json",
]

import json
import os
# 读mapping文件
mapping_file = "/home/weishaohang/workspace/Omni-Temp/TIME-Lite/mapping_idx_from_TIME/tcelongbench_verify_to_original_mapping.json"
with open(mapping_file, "r", encoding="utf-8") as f:
    mapping_idx_from_TIME_to_original = json.load(f)
    
def main():
    for retriever_setting in retriever_settings:
        for qa_file_base_name in tcelongbench_QAs_files_base_names:
            # context文件
            context_file = os.path.join(ORIGINAL_CHUNK_BASE, retriever_setting, qa_file_base_name)
            with open(context_file, "r", encoding="utf-8") as f:
                context_data = json.load(f)
            assert os.path.exists(context_file), f"context文件不存在: {context_file}"
            
            # 读取mapping文件对应的index
            mapping_dict = mapping_idx_from_TIME_to_original[qa_file_base_name]
            
            all_contexts = []
            for idx, (original_idx, original_idx_in_context) in mapping_dict.items():
                # 读取检索到的context list
                context_list = context_data[original_idx][original_idx_in_context]
                all_contexts.append(context_list)
            
            # 保存文件
            save_context_file = os.path.join(SAVE_CONTEXT_BASE, retriever_setting, qa_file_base_name)
            with open(save_context_file, "w", encoding="utf-8") as f:
                json.dump(all_contexts, f, ensure_ascii=False, indent=4)
                
if __name__ == "__main__":
    main()