import os
import pandas as pd
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")

# Define output columns
COLUMNS = [
    "Topic", "Human/LLM",
    "Initial Opinion - Slider Bias", "Initial Opinion - Slider Diversity",
    "Post Opinion - Slider Bias", "Post Opinion - Slider Diversity",
    "Post Opinion (Slider) Bias - Initial Opinion (Slider) Bias",
    "Post Opinion (Slider) Diversity - Initial Opinion (Slider) Diversity",
    "Tweet 1 Bias", "Tweet 1 Diversity",
    "Tweet 2 Bias", "Tweet 2 Diversity",
    "Tweet 3 Bias", "Tweet 3 Diversity",
    "Tweet 3 Bias - Tweet 1 Bias",
    "Tweet 3 Diversity - Tweet 1 Diversity"
]

def safe_get(df, key):
    try:
        return float(df.get(key, [0])[0])
    except:
        return 0.0

def get_row_key(topic_name, source):
    return (topic_name, source)

def get_or_create_row(rows_dict, topic_name, source):
    key = get_row_key(topic_name, source)
    if key not in rows_dict:
        rows_dict[key] = dict.fromkeys(COLUMNS, 0)
        rows_dict[key]["Topic"] = topic_name
        rows_dict[key]["Human/LLM"] = source
    return rows_dict[key]

def process_group_level(group_level_dir, output_file):
    rows_dict = {}

    for topic_folder in os.listdir(group_level_dir):
        topic_path = os.path.join(group_level_dir, topic_folder)
        if not os.path.isdir(topic_path):
            continue

        topic_name = topic_folder.replace("_", " ") + "."

        # ---------- Human Opinion ----------
        human_opinion_path = os.path.join(topic_path, "human", "opinion", "summary.csv")
        if os.path.exists(human_opinion_path):
            df = pd.read_csv(human_opinion_path)
            row = get_or_create_row(rows_dict, topic_name, "human")
            row["Initial Opinion - Slider Bias"] = safe_get(df, "overall_avg_initial")
            row["Initial Opinion - Slider Diversity"] = safe_get(df, "overall_std_initial")
            row["Post Opinion - Slider Bias"] = safe_get(df, "overall_avg_post")
            row["Post Opinion - Slider Diversity"] = safe_get(df, "overall_std_post")
            row["Post Opinion (Slider) Bias - Initial Opinion (Slider) Bias"] = row["Post Opinion - Slider Bias"] - row["Initial Opinion - Slider Bias"]
            row["Post Opinion (Slider) Diversity - Initial Opinion (Slider) Diversity"] = row["Post Opinion - Slider Diversity"] - row["Initial Opinion - Slider Diversity"]
        else:
            logging.warning(f"Missing file: {human_opinion_path}")

        # ---------- Human Tweet ----------
        human_tweet_path = os.path.join(topic_path, "human", "tweet")
        if os.path.exists(human_tweet_path):
            found = False
            for model in os.listdir(human_tweet_path):
                model_path = os.path.join(human_tweet_path, model, "summary_stance.csv")
                if os.path.exists(model_path):
                    df = pd.read_csv(model_path)
                    row = get_or_create_row(rows_dict, topic_name, "human")
                    row["Tweet 1 Bias"] = safe_get(df, "overall_chat1")
                    row["Tweet 1 Diversity"] = safe_get(df, "overall_std1")
                    row["Tweet 2 Bias"] = safe_get(df, "overall_chat2")
                    row["Tweet 2 Diversity"] = safe_get(df, "overall_std2")
                    row["Tweet 3 Bias"] = safe_get(df, "overall_chat3")
                    row["Tweet 3 Diversity"] = safe_get(df, "overall_std3")
                    row["Tweet 3 Bias - Tweet 1 Bias"] = row["Tweet 3 Bias"] - row["Tweet 1 Bias"]
                    row["Tweet 3 Diversity - Tweet 1 Diversity"] = row["Tweet 3 Diversity"] - row["Tweet 1 Diversity"]
                    found = True
                    break
            if not found:
                logging.warning(f"No valid summary_stance.csv found in {human_tweet_path}")
        else:
            logging.warning(f"Missing folder: {human_tweet_path}")

        # ---------- Simulation Opinion ----------
        sim_opinion_path = os.path.join(topic_path, "simulation", "opinion")
        if os.path.exists(sim_opinion_path):
            for root, _, files in os.walk(sim_opinion_path):
                for file in files:
                    if file == "summary_opinion.csv":
                        summary_path = os.path.join(root, file)
                        df = pd.read_csv(summary_path)
                        model_name = os.path.relpath(root, sim_opinion_path)
                        if model_name == ".":
                            logging.warning(f"Skipping summary_opinion.csv directly in {sim_opinion_path}")
                            continue
                        row = get_or_create_row(rows_dict, topic_name, model_name)
                        row["Initial Opinion - Slider Bias"] = safe_get(df, "overall_avg_initial")
                        row["Initial Opinion - Slider Diversity"] = safe_get(df, "overall_std_initial")
                        row["Post Opinion - Slider Bias"] = safe_get(df, "overall_avg_post")
                        row["Post Opinion - Slider Diversity"] = safe_get(df, "overall_std_post")
                        row["Post Opinion (Slider) Bias - Initial Opinion (Slider) Bias"] = row["Post Opinion - Slider Bias"] - row["Initial Opinion - Slider Bias"]
                        row["Post Opinion (Slider) Diversity - Initial Opinion (Slider) Diversity"] = row["Post Opinion - Slider Diversity"] - row["Initial Opinion - Slider Diversity"]
        else:
            logging.warning(f"Missing folder: {sim_opinion_path}")

        # ---------- Simulation Tweet ----------
        sim_tweet_path = os.path.join(topic_path, "simulation", "tweet")
        if os.path.exists(sim_tweet_path):
            for root, _, files in os.walk(sim_tweet_path):
                for file in files:
                    if file == "summary_stance.csv":
                        summary_path = os.path.join(root, file)
                        df = pd.read_csv(summary_path)
                        model_name = os.path.relpath(root, sim_tweet_path)
                        if model_name == ".":
                            logging.warning(f"Skipping summary_stance.csv directly in {sim_tweet_path}")
                            continue
                        row = get_or_create_row(rows_dict, topic_name, model_name)
                        row["Tweet 1 Bias"] = safe_get(df, "overall_chat1")
                        row["Tweet 1 Diversity"] = safe_get(df, "overall_std1")
                        row["Tweet 2 Bias"] = safe_get(df, "overall_chat2")
                        row["Tweet 2 Diversity"] = safe_get(df, "overall_std2")
                        row["Tweet 3 Bias"] = safe_get(df, "overall_chat3")
                        row["Tweet 3 Diversity"] = safe_get(df, "overall_std3")
                        row["Tweet 3 Bias - Tweet 1 Bias"] = row["Tweet 3 Bias"] - row["Tweet 1 Bias"]
                        row["Tweet 3 Diversity - Tweet 1 Diversity"] = row["Tweet 3 Diversity"] - row["Tweet 1 Diversity"]
        else:
            logging.warning(f"Missing folder: {sim_tweet_path}")

    # Save output
    df_out = pd.DataFrame(list(rows_dict.values()), columns=COLUMNS)
    df_out.to_csv(output_file, index=False)
    logging.info(f"Saved summary to {output_file}")

# -------- Entry point --------
if __name__ == "__main__":
    PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
    GROUP_LEVEL_DIR = os.path.join(PROJECT_ROOT, "result", "eval", "group_level")
    OUTPUT_FILE = os.path.join(GROUP_LEVEL_DIR, "summary_group_level_metrics.csv")

    if not os.path.exists(GROUP_LEVEL_DIR):
        logging.error(f"Directory not found: {GROUP_LEVEL_DIR}")
        exit(1)

    process_group_level(GROUP_LEVEL_DIR, OUTPUT_FILE)
