import os
import json
import csv
import pandas as pd
from collections import OrderedDict
import argparse

def load_benchmark_tables(benchmarks, result_dir) -> dict[str, pd.DataFrame]:
    tables = {}
    for bench in benchmarks:
        path = os.path.join(result_dir, f"{bench}.csv")
        tbl = pd.read_csv(path)
        if "row_name" not in tbl.columns:
            raise ValueError(f"{path} lacks required 'row_name' column.")
        tables[bench] = tbl.set_index("row_name")  # index → experiments
    return tables

def summarize(models: list, keys_to_fetch: dict, 
    result_dir = ".", savefile = "summary.csv") -> pd.DataFrame:

    tables = load_benchmark_tables(keys_to_fetch.keys(), result_dir)
    rows = []
    for model in models:
        row = {"model": model}
        for bench, metrics in keys_to_fetch.items():
            df = tables[bench]
            if model not in df.index:                      # missing experiment
                for metric in metrics:
                    row[f"{bench}_{metric}"] = "-"
                continue
            for metric in metrics:
                row[f"{bench}_{metric}"] = df.at[model, metric]
        rows.append(row)

    out_df = pd.DataFrame(rows)
    out_df.to_csv(os.path.join(result_dir, savefile), index=False)
    print(f"Wrote {savefile} with shape {out_df.shape}")
    return out_df

models = [
    "clip/llava-v1.5-7b-nonorm",
    "clip/llava-v1.5-7b-rms",
    "clip/llava-v1.5-7b-rms-nocls",
    "clip/llava-v1.5-7b-rms-shuffle",
    "clip/Rllava-v0.48-ca1x-7b-rms",
    "clip/Cllava-v0.12-ca1x-7b",
    "clip/Rllava-v0.48_db_-ca1x-7b-rms",

    "siglip2/llava-v1.5-7b",
    "siglip2/llava-v1.5-7b-rms",
    "siglip2/Rllava-v0.48-ca1x-7b-rms",
    "siglip2/Cllava-v0.12-ca1x-7b",
    "siglip2/Rllava-v0.48_db_-ca1x-7b-rms",

    "radio/llava-v1.5-7b-layer2",
    "radio/llava-v1.5-7b-layer2-rms",
    "radio/Rllava-v0.48-ca1x-7b_2",
    "radio/Rllava-v0.48-ca1x-7b-rms",
    "radio/Cllava-v0.12-ca1x-7b",
    "radio/Cllava-v0.10-7b-ca1x",
    "radio/Rllava-v0.48_db_-ca1x-7b",
    "radio/Rllava-v0.48_db_-ca1x-7b-rms",

    "radio/llava-v1.5-576x-7b",
    "radio/Rllava-v0.48-576x-7b-rms",
    "radio/Rllava-v0.48_db_-576x-7b-rms",
    "radio/llava-v1.5-768x-7b",
    "radio/Rllava-v0.48-768x-7b",
    "radio/Rllava-v0.48_db_-768x-7b",

    "radio/llava-v1.5-7b-nocls",
    "radio/llava-v1.5-7b-shuffle",
    "radio/Rllava-v0.4A_db_-768x-7b",
    "radio/Rllava-v0.49_db_-768x-7b",
    "radio/Rllava-v0.48_db_-768x-7b-shuffle",

    "radio/Rllava-v0.48-pool-7b",
    "radio/Cllava-v0.12-pool-7b",
    "radio/Rllava-v0.48_db_-pool-7b",
    "radio/Rllava-v0.48-768x-pool-7b",
    "radio/Rllava-v0.48_db_-768x-pool-7b",

    "qwen3-radio/llava-v1.5-8b-qwen3",
    "qwen3-radio/Rllava-v0.48-ca1x-8b-qwen3-rms",
    "qwen3-radio/Cllava-v0.12-ca1x-8b-qwen3-rms",
    "qwen3-radio/Rllava-v0.48_db_-ca1x-8b-qwen3-rms",
]

keys_to_fetch = {
    "pixcv": ["ntokens", "focus",],
    "pope": ["f1"],
    "ocrbench": ["Text Recognition", "Scene Text-centric VQA", "Doc-oriented VQA", "Key Information Extraction", "Handwritten Mathematical Expression Recognition"],
    "cvbench": ["Count", "Relation", "Depth", "Distance"],
    "mmstar": ["coarse perception", "fine-grained perception", "instance reasoning", "logical reasoning", "science & technology", "math"]
}

keys_brief = {
    "pixcv": ["ntokens", "focus",],
    "pope": ["f1"],
    "ocrbench": ["Final Score"],
    "cvbench": ["all"],
    "mmstar": ["final score"]
}

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--result_dir', type=str)
    args = parser.parse_args()

    summarize(models, keys_to_fetch, result_dir=args.result_dir)
    summarize(models, keys_brief, result_dir=args.result_dir, savefile="summary_brief.csv")

