import argparse
import pandas as pd
import json
import os
from collections import defaultdict


def convert_dataset_to_json(dataset_path, output_path, stats_path=None):
    df = pd.read_csv(dataset_path)

    grouped_data = defaultdict(list)

    for _, row in df.iterrows():
        scene_name = row["SceneName"]

        display_name = scene_name.replace("_", " ").title()

        task = {
            "prompt": row["Prompt"],
            "image_path": row["ImagePath"],
            "correct_answer": str(row["Correct"]),
        }

        grouped_data[display_name].append(task)

    stats_data = {}
    if stats_path and os.path.exists(stats_path):
        with open(stats_path, "r") as f:
            stats_json = json.load(f)
            stats_data = stats_json
        print(f"Loaded stats from {stats_path}")

    result = {}
    for scene_name, tasks in grouped_data.items():
        scene_results = {}

        if scene_name in stats_data:
            for model, model_stats in stats_data[scene_name].items():
                scene_results[model] = model_stats["pass_1"]

        result[scene_name] = {"tasks": tasks, "results": scene_results}

    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, "w") as f:
        json.dump(result, f, indent=2)

    print(f"Converted {len(df)} tasks from {len(grouped_data)} scenes")
    if stats_path:
        print(
            f"Added results for {sum(1 for scene_results in result.values() if scene_results['results'])} scenes"
        )

    print(f"Output saved to {output_path}")


def main():
    parser = argparse.ArgumentParser(
        description="Convert dataset.csv to data.json for website"
    )
    parser.add_argument(
        "--dataset-dir",
        type=str,
        required=True,
        help="Path to dataset directory containing dataset.csv",
    )
    parser.add_argument(
        "--stats", type=str, help="Path to stats.json file to include results data"
    )
    args = parser.parse_args()

    dataset_path = os.path.join(args.dataset_dir, "dataset.csv")
    output_path = os.path.join(args.dataset_dir, "data.json")

    if not os.path.exists(dataset_path):
        raise FileNotFoundError(f"Dataset file not found: {dataset_path}")

    print(f"Converting {dataset_path} to {output_path}")
    convert_dataset_to_json(dataset_path, output_path, args.stats)


if __name__ == "__main__":
    main()
