{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0dc3722d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "import json\n",
    "\n",
    "# Setup\n",
    "methods = ['retraining', 'original', 'gd', 'ga', 'scrub', 'scr_newton', 'dpo', 'KL', 'npo']\n",
    "seeds = [1,2,3]\n",
    "base_path = '~/unlearning/cure_newton/sequential_unlearning/tofu'\n",
    "columns_of_interest = [\n",
    "    'ROUGE Forget',\n",
    "    'ROUGE Retain',\n",
    "    'ROUGE Real Authors',\n",
    "    'Truth Ratio Forget'\n",
    "]\n",
    "mia_metric = 'forget_holdout_Min-40%'\n",
    "\n",
    "# Store results\n",
    "mean_results = {}\n",
    "std_results = {}\n",
    "mia_results = {}\n",
    "time_results = {}\n",
    "\n",
    "for method in methods:\n",
    "    dfs = []\n",
    "    mia_values = []\n",
    "    seed_times = []\n",
    "    \n",
    "    for seed in seeds:\n",
    "        # Load CSV\n",
    "        csv_path = os.path.join(base_path, f'seed-{seed}', 'eval', method, 'tofu.csv')\n",
    "        df = pd.read_csv(csv_path)\n",
    "        dfs.append(df[columns_of_interest])\n",
    "        \n",
    "        # Load JSON (MIA)\n",
    "        json_path = os.path.join(base_path, f'seed-{seed}', 'eval', method, 'mia.json')\n",
    "        with open(json_path, 'r') as f:\n",
    "            mia_data = json.load(f)\n",
    "            mia_values.append(mia_data)\n",
    "\n",
    "        # Load JSON (Time)\n",
    "        time_path = os.path.join(base_path, f'seed-{seed}', 'by-instance', method, 'stats.json')\n",
    "        if os.path.exists(time_path):\n",
    "            with open(time_path, 'r') as f:\n",
    "                time_data = json.load(f)\n",
    "                round_times = [entry['running_time'] for entry in time_data]\n",
    "                avg_time = sum(round_times) / len(round_times)\n",
    "                seed_times.append(avg_time)\n",
    "        else:\n",
    "            seed_times.append(float('nan'))  # In case file is missing\n",
    "\n",
    "    # CSV: mean + std\n",
    "    combined = pd.concat(dfs).reset_index(drop=True)\n",
    "    mean_results[method] = combined.mean()\n",
    "    std_results[method] = combined.std()\n",
    "    \n",
    "    # MIA: mean + std\n",
    "    mia_mean = sum(mia_values) / len(mia_values)\n",
    "    mia_std = pd.Series(mia_values).std()\n",
    "    mia_results[method] = (mia_mean, mia_std)\n",
    "\n",
    "    # Time: mean ± std\n",
    "    time_mean = sum(seed_times) / len(seed_times)\n",
    "    time_std = pd.Series(seed_times).std()\n",
    "    time_results[method] = (time_mean, time_std)\n",
    "\n",
    "# Combine into final DataFrame\n",
    "formatted_results = {}\n",
    "for method in methods:\n",
    "    formatted = {}\n",
    "    for col in columns_of_interest:\n",
    "        mean_val = mean_results[method][col]\n",
    "        std_val = std_results[method][col]\n",
    "        formatted[col] = f\"{mean_val:.4f} ± {std_val:.4f}\"\n",
    "\n",
    "    mia_mean, mia_std = mia_results[method]\n",
    "    formatted['Forget Holdout Min-40%'] = f\"{mia_mean:.4f} ± {mia_std:.4f}\"\n",
    "\n",
    "    time_mean, time_std = time_results[method]\n",
    "    formatted['Time (s)'] = f\"{time_mean:.2f} ± {time_std:.2f}\"\n",
    "\n",
    "    formatted_results[method] = formatted\n",
    "\n",
    "summary_df = pd.DataFrame(formatted_results).T\n",
    "\n",
    "# Display results\n",
    "print(summary_df)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "unlearning",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
