import pandas as pd
import json
import os
import argparse
from tqdm import tqdm
import numpy as np

def jsonl_read(file_path, evaluator):
    eval_list = []
    n = 0
    with open (file_path, 'r') as f:
        for line in f:
            n += 1
            data = json.loads(line)
            record = data.get(f'{evaluator}_eval')
            eval_list.append(record)
    assert len(eval_list) == n, ValueError("Not all records are scored")
    return eval_list

if __name__=='__main__':
    parser = argparse.ArgumentParser(description="Comparsion")
    parser.add_argument('--file1', type=str, help="The path to the eval file 1.")
    parser.add_argument('--file2', type=str, help="The path to the eval file 2.")
    parser.add_argument('--evaluator', type=str, help="The evaluator to score.")
    params = parser.parse_args()

    file1 = os.path.abspath(params.file1)
    file2 = os.path.abspath(params.file2)
    out_dir = os.path.join(os.path.dirname(os.path.dirname(file2)),"comparison")
    os.makedirs(out_dir, exist_ok=True)
    eval_list1 = jsonl_read(file1, params.evaluator)
    eval_list2 = jsonl_read(file2, params.evaluator)
    assert len(eval_list1) == len(eval_list2), ValueError("The number of the two responses are not equal")

    score1_greater = 0
    score1_equal = 0
    score1_less = 0
    invalid_count = 0
    avg_score1 = 0
    avg_score2 = 0
    valid1 = 0
    valid2 = 0
    for record1, record2 in zip(eval_list1, eval_list2):
        s1 = record1.get(f"{params.evaluator}_score", 0)
        s2 = record2.get(f"{params.evaluator}_score", 0)
        flag1 = record1["flag"]
        flag2 = record2["flag"]
        if flag1 == 1: 
            avg_score1 += s1
            valid1 += 1
        if flag2 == 1:
            avg_score2 += s2
            valid2 += 1
        if flag1 ==1 and flag2 == 1:
            if s1 > s2:
                score1_greater += 1
            elif s1 < s2:
                score1_less += 1
            else:
                score1_equal += 1
        else:
            invalid_count += 1
    out_file = os.path.join(out_dir,f'{params.evaluator}_comparison.jsonl')
    output_data = {
        "File1": file1,
        "File2": file2,
        "Total_responses": len(eval_list1),
        "Valid_comparison": len(eval_list1) - invalid_count,
        "Average_Score1": avg_score1 / valid1,
        "Average_Score2": avg_score2 / valid2,
        "Loss": {
            "count": score1_greater,
            "rate": score1_greater / (len(eval_list1) - invalid_count)
        },
        "Tie": {
            "count": score1_equal,
            "rate": score1_equal / (len(eval_list1) - invalid_count)
        },
        "Win": {
            "count": score1_less,
            "rate": score1_less / (len(eval_list1) - invalid_count)
        },
        "Win_Tie_rate": (score1_less + score1_equal) / (len(eval_list1) - invalid_count),
        "Invalid_Count": invalid_count
    }

    with open(out_file, 'a') as file:
        file.write(json.dumps(output_data) + "\n")
    
    print("Comparison finished!")
    