import os
import tools
from tqdm import tqdm
SAMPLE_NUM = 20

datasets = [
    {"datasets--allenai--reward-bench-2": [
        "train_NOANS", "train_ANS", "validation_NOANS"]},
    {"datasets--RUC-NLPIR--FlashRAG_datasets@hotpotqa_RAG": [
        "train_NOANS", "train_ANS", "validation_NOANS"]}
]
models = tools.BACKBONE_MODELS
for model in models:
    for dataset in datasets:
        for dataset_name, dataset_subsets in dataset.items():
            for dataset_subset in dataset_subsets:
                DIR = f"{tools.machine_pather()}/works/DPO/judge/output/{dataset_name}/{dataset_subset}/{model}/"

                assert "judge/output" in DIR
                for path in os.listdir(DIR):
                    print(dataset_name, dataset_subset, model, path)

                    path = os.path.join(DIR, path)
                    part_num = None
                    for tmp_part_num in [1, 2, 4, 8]:
                        for i in range(tmp_part_num):
                            if_ok = True
                            if not os.path.exists(f"{path}/success_part{i+1}of{tmp_part_num}.tag"):
                                if_ok = False
                                break
                        if if_ok:
                            part_num = tmp_part_num
                            break
                    if part_num is None:
                        print("No part_num found, skipping...")
                        print("==" * 20)

                        continue
                    if os.path.exists(f"{path}/score.jsonl"):
                        print(f"Score file already exists: {path}/score.jsonl")
                        print("==" * 20)

                        continue
                    for each in range(part_num):
                        assert os.path.exists(
                            f"{path}/score_part{each+1}of{part_num}.jsonl"), f"{path}/score_part{each+1}of{part_num}.jsonl"
                    print(f"Part number: {part_num}")

                    lines = []
                    for file_idx in tqdm(range(part_num)):
                        lines.extend(tools.read_jsonl(
                            f"{path}/score_part{file_idx+1}of{part_num}.jsonl"))
                    input_list = []
                    for line in tqdm(lines):
                        input_list.append(line['input'])
                    input_PATH = f"{tools.machine_pather()}/works/DPO/judge/input/{dataset_name}/{dataset_subset}/{model}/judge_input.jsonl"

                    original_input_list = []
                    for each in tqdm(tools.read_jsonl(input_PATH)):
                        original_input_list.append(1)

                    assert len(input_list) == len(
                        original_input_list), f"Length mismatch: {len(input_list)} != {len(original_input_list)}"

                    print("Input check passed.")

                    if part_num == 1:
                        os.system(
                            f'cp {path}/score_part1of1.jsonl {path}/score.jsonl')

                    elif part_num == 2:
                        os.system(
                            f'cat {path}/score_part1of2.jsonl {path}/score_part2of2.jsonl > {path}/score.jsonl')

                    elif part_num == 4:
                        os.system(
                            f'cat {path}/score_part1of4.jsonl {path}/score_part2of4.jsonl {path}/score_part3of4.jsonl {path}/score_part4of4.jsonl > {path}/score.jsonl')
                    elif part_num == 8:
                        os.system(
                            f'cat {path}/score_part1of8.jsonl {path}/score_part2of8.jsonl {path}/score_part3of8.jsonl {path}/score_part4of8.jsonl {path}/score_part5of8.jsonl {path}/score_part6of8.jsonl {path}/score_part7of8.jsonl {path}/score_part8of8.jsonl > {path}/score.jsonl')

                    print("saved as score.jsonl")
                    print('before:')
                    os.system(f"wc -l {input_PATH}")
                    print('after:')
                    os.system(f"wc -l {path}/score.jsonl")
                    print("==" * 20)
