from constants import data_prefix, model_name, eval_model_save_name, version
import pandas as pd
import os
from . import util

output_file = f"../../result/eval/human_llm/{data_prefix}/llm_report_{version}.csv"


# Average Semantic Similarity (average across all messages and 4 people in the entire experiment)
sim_score_file = f"../../result/eval/human_llm/{data_prefix}/{model_name}/human_llm_score_{version}.csv"
sim_score_df = pd.read_csv(sim_score_file)
sim_score_df = sim_score_df[sim_score_df["event_type"] == "message_sent"]
sim_score_df_person_avg = sim_score_df.groupby("empirica_id")["score"].mean().mean()  # person level average
sim_score_df_message_avg = sim_score_df["score"].mean()  # message level average

# Average Absolute Difference in Likert Scale rating (average across all messages and 4 people in the entire experiment)
human_likert_file = f"../../result/eval/human_llm/{data_prefix}/{model_name}/opinion_human_memory_{eval_model_save_name}_{version}.csv"
llm_likert_file = f"../../result/eval/human_llm/{data_prefix}/{model_name}/opinion_llm_memory_{eval_model_save_name}_{version}.csv"
human_likert_df = pd.read_csv(human_likert_file)
human_likert_df = human_likert_df[human_likert_df["event_type"] == "message_sent"]
llm_likert_df = pd.read_csv(llm_likert_file)
llm_likert_df = llm_likert_df[llm_likert_df["event_type"] == "message_sent"]
likert_diff_df = pd.DataFrame({
    "empirica_id": human_likert_df["empirica_id"],
    "score": (human_likert_df["likert_pred"] - llm_likert_df["likert_pred"]).abs()
})
likert_diff_person_avg = likert_diff_df.groupby("empirica_id")["score"].mean().mean()  # person level average
likert_diff_message_avg = likert_diff_df["score"].mean()  # message level average

# Difference in average number of words
simulation_file = f"../../result/simulation/{data_prefix}/{model_name}/simulation-{version}.csv"
simulation_df = pd.read_csv(simulation_file)
simulation_df = util.preprocess_simulation_df(simulation_df, consecutive_messages=False)
simulation_df = simulation_df[simulation_df["event_type"] == "message_sent"]

simulation_df["n_human_words"] = simulation_df["text"].str.split().str.len()
simulation_df["n_llm_words"] = simulation_df["llm_text"].str.split().str.len()
simulation_df["diff_n_words"] = simulation_df["n_human_words"] - simulation_df["n_llm_words"]
n_word_diff_person_avg = simulation_df.groupby("empirica_id")["diff_n_words"].mean().mean()
n_word_diff_message_avg = simulation_df["diff_n_words"].mean()
simulation_df["abs_diff_n_words"] = simulation_df["diff_n_words"].abs()
n_word_abs_diff_person_avg = simulation_df.groupby("empirica_id")["abs_diff_n_words"].mean().mean()
n_word_abs_diff_message_avg = simulation_df["abs_diff_n_words"].mean()


if os.path.exists(output_file):
    output_df = pd.read_csv(output_file)
else:
    output_df = pd.DataFrame(columns=["data_prefix", "model", "eval_model", "type", "score"])

output_df.set_index(["data_prefix", "model", "eval_model", "type"], inplace=True)
output_df.loc[(data_prefix, model_name, "", "semantic_similarity_person"), "score"] = sim_score_df_person_avg
output_df.loc[(data_prefix, model_name, "", "semantic_similarity_message"), "score"] = sim_score_df_message_avg
output_df.loc[(data_prefix, model_name, eval_model_save_name, "likert_diff_person"), "score"] = likert_diff_person_avg
output_df.loc[(data_prefix, model_name, eval_model_save_name, "likert_diff_message"), "score"] = likert_diff_message_avg
output_df.loc[(data_prefix, model_name, "", "n_word_diff_person"), "score"] = n_word_diff_person_avg
output_df.loc[(data_prefix, model_name, "", "n_word_diff_message"), "score"] = n_word_diff_message_avg
output_df.loc[(data_prefix, model_name, "", "n_word_abs_diff_person"), "score"] = n_word_abs_diff_person_avg
output_df.loc[(data_prefix, model_name, "", "n_word_abs_diff_message"), "score"] = n_word_abs_diff_message_avg

output_df.to_csv(output_file)
