import re
from collections import defaultdict
from logging import root
from pathlib import Path

import numpy as np
import pandas as pd

results_dir = Path("results")
root_dirs_dict = defaultdict(list)
for root_dir in sorted(results_dir.glob("*")):
    stem = root_dir.stem.rsplit("_", 1)[0]
    root_dirs_dict[stem].append(root_dir)

for experiment_name, root_dirs in root_dirs_dict.items():
    success_rates = []
    print(experiment_name)
    for root_dir in sorted(root_dirs):
        if not root_dir.is_dir():
            continue
        for logdir in sorted(root_dir.glob("*")):
            if not logdir.is_dir():
                continue
            csv = pd.read_csv(logdir / "eval.csv")
            success = csv["success"].values
            success_rate = np.mean(success)
            success_rates.append(success_rate)

    mean = np.mean(success_rates)
    std = np.std(success_rates)
    # print(success_rates)
    print(f"{mean:.3f} \pm {std:.3f}")
    print()
