import os
import pandas as pd
import yaml

def recover_panel_sweep_results(base_output_dir="outputs/panel_study_selection", sweep_subdir="multi_round"):
    """
    Recovers partial/full panel sweep results from saved outputs.

    Args:
        base_output_dir (str): Root directory where sweep results are stored.
        sweep_subdir (str): Subfolder containing individual run directories.

    Returns:
        pd.DataFrame: Concatenated and sorted summary rows.
    """
    sweep_dir = os.path.join(base_output_dir, sweep_subdir)
    all_rows = []

    for sub in os.listdir(sweep_dir):
        subdir_path = os.path.join(sweep_dir, sub)
        summary_path = os.path.join(subdir_path, "aggregate_summary.csv")
        config_path = os.path.join(subdir_path, "config_used.yaml")

        if not os.path.isfile(summary_path):
            continue

        try:
            df = pd.read_csv(summary_path)
            if df.empty:
                continue
            row = df.iloc[0].to_dict()

            # Extract metadata from config or folder name
            if "wave" not in row or "repeat" not in row:
                if os.path.isfile(config_path):
                    with open(config_path, "r") as f:
                        config = yaml.safe_load(f)
                    row["wave"] = config.get("experiment", {}).get("name", "").split("_")[1]
                    row["repeat"] = int(config.get("experiment", {}).get("name", "").split("_")[-1])
                else:
                    parts = sub.split("_")
                    row["wave"] = parts[1] if len(parts) > 1 else "unknown"
                    row["repeat"] = int(parts[-1]) if parts[-1].isdigit() else -1

            row["output_dir"] = subdir_path
            all_rows.append(row)

        except Exception as e:
            print(f"⚠️ Skipping {subdir_path} due to error: {e}")

    summary_df = pd.DataFrame(all_rows)
    summary_df = summary_df.sort_values(by=["wave", "repeat"])
    return summary_df

if __name__ == "__main__":
    df = recover_panel_sweep_results()
    output_path = "outputs/panel_study_selection/all_results_recovered.csv"
    df.to_csv(output_path, index=False)
    print(f"✓ Recovered {len(df)} runs. Saved to {output_path}.")