# Must be run after run_pipeline.py and llm_report.py finished for all models

import os
import re
data_prefixes = []

input_dir = os.path.join("../../data", "processed_data")
filename_pattern = r"202412(06|11)_.*\.csv"

list_files = [f for f in os.listdir(input_dir) if re.match(filename_pattern, f) is not None and os.path.isfile(os.path.join(input_dir, f))]
data_prefixes.extend([f[:-4] for f in list_files])

version = "v2"
import pandas as pd
import os
import shutil

output_dir = f"../../result/eval/human_llm/llm_reports_{version}"

if os.path.exists(output_dir):
    shutil.rmtree(output_dir)
os.makedirs(output_dir)

pre_metrics = ["semantic_similarity", "likert_diff", "n_word_diff", "n_word_abs_diff"]
metrics = []
for metric in pre_metrics:
    for weight in ["person", "message"]:
        metrics.append(f"{metric}_{weight}")

model_info = {
    # "gpt-4o-mini-2024-07-18": { "type": "Instruct" },
    
    # "Mistral-7B-v0.1": { "type": "Base" },
    # "mistral-7b-sft-beta": { "type": "SFT", "base": "Mistral-7B-v0.1" },
    # "zephyr-7b-beta": { "type": "DPO", "base": "mistral-7b-sft-beta" },
    # "Mistral-7B-Instruct-v0.1": { "type": "Instruct", "base": "Mistral-7B-v0.1" },
    
    # "Meta-Llama-3-8B": { "type": "Base" },
    # "LLaMA3-SFT": { "type": "SFT", "base": "Meta-Llama-3-8B" },
    # "LLaMA3-iterative-DPO-final": { "type": "DPO", "base": "LLaMA3-SFT" },
    # "Meta-Llama-3-8B-Instruct": { "type": "Instruct", "base": "Meta-Llama-3-8B" },
    
    "Llama-3.1-8B": { "type": "Base" },
    "Llama-3.1-Tulu-3-8B-SFT": { "type": "SFT", "base": "Llama-3.1-8B" },
    "Llama-3.1-Tulu-3-8B-DPO": { "type": "DPO", "base": "Llama-3.1-Tulu-3-8B-SFT" },
    "Llama-3.1-Tulu-3-8B": { "type": "RLVR", "base": "Llama-3.1-Tulu-3-8B-DPO" },
    # "Llama-3.1-8B-Instruct": { "type": "Instruct", "base": "Llama-3.1-8B" },
    
    # "OLMo-2-1124-7B": { "type": "Base" },
    # "OLMo-2-1124-7B-SFT": { "type": "SFT", "base": "OLMo-2-1124-7B" },
    # "OLMo-2-1124-7B-DPO": { "type": "DPO", "base": "OLMo-2-1124-7B-SFT" },
    # "OLMo-2-1124-7B-Instruct": { "type": "RLVR", "base": "OLMo-2-1124-7B-DPO" },
    
    "Llama-3.1-Tulu-3-8B-MT-DDPO-0129": { "type": "DDPO", "base": "Llama-3.1-Tulu-3-8B-SFT" }
}

for data_prefix in data_prefixes:
    input_file = f"../../result/eval/human_llm/{data_prefix}/llm_report_{version}.csv"
    all_report_df = pd.read_csv(input_file)

    for metric in metrics:
        output_file = f"{output_dir}/{metric}.csv"
        metric_report_df = all_report_df[all_report_df["type"] == metric].copy()
        metric_report_df["model_type"] = metric_report_df["model"].apply(lambda x: model_info[x]["type"])
        metric_report_df["model_base"] = metric_report_df["model"].apply(lambda x: model_info[x].get("base", ""))
        metric_report_df = metric_report_df[["data_prefix", "model_type", "model_base", "model", "score"]]
        if os.path.exists(output_file):
            metric_report_df.to_csv(output_file, mode="a", header=False, index=False)
        else:
            metric_report_df.to_csv(output_file, index=False)
