{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import re\n",
    "\n",
    "input_file = \"test.csv\"\n",
    "\n",
    "# 1) Read and parse line by line, also record Head Group info\n",
    "records = []\n",
    "current_head = 0\n",
    "with open(input_file, encoding='utf-8') as f:\n",
    "    for raw in f:\n",
    "        line = raw.strip()\n",
    "        if not line:\n",
    "            continue\n",
    "        # Detect head group separator\n",
    "        m = re.match(r\"^===== Head Group (\\d+) =====\", line)\n",
    "        if m:\n",
    "            current_head = int(m.group(1))\n",
    "            continue\n",
    "        # Skip α=… or header lines\n",
    "        if line.startswith('α') or line.startswith('Alpha'):\n",
    "            continue\n",
    "        parts = line.split(',')\n",
    "        if len(parts) != 8:\n",
    "            continue\n",
    "        alpha, beta, rnd, typ, cs, ci, f1, ln = parts\n",
    "        records.append({\n",
    "            'HeadGroup': current_head,\n",
    "            'Alpha':     float(alpha),\n",
    "            'Beta':      float(beta),\n",
    "            'Round':     int(rnd),\n",
    "            'Type':      typ,\n",
    "            'CHAIR-s':   float(cs),\n",
    "            'CHAIR-i':   float(ci),\n",
    "            'F1':        float(f1),\n",
    "            'Len':       float(ln),\n",
    "        })\n",
    "\n",
    "df = pd.DataFrame(records)\n",
    "\n",
    "# 2) Split Baseline / Pruned\n",
    "baseline_df = df[df.Type == 'Baseline']\n",
    "group_df    = df[df.Type == 'Pruned']\n",
    "\n",
    "# 3) Pre-compute global mean for Baseline\n",
    "metrics = ['CHAIR-s', 'CHAIR-i', 'F1', 'Len']\n",
    "base_mean = {m: baseline_df[m].mean() for m in metrics}\n",
    "\n",
    "# 4) Group by HeadGroup + Alpha + Beta, compute Δ(global mean) and keep original means\n",
    "rows = []\n",
    "for (hg, a, b), grp in group_df.groupby(['HeadGroup','Alpha','Beta']):\n",
    "    out = {'HeadGroup': hg, 'Alpha': a, 'Beta': b}\n",
    "    for m in metrics:\n",
    "        change = (grp[m].mean() - base_mean[m]) / base_mean[m] * 100\n",
    "        out[f'Δ{m} (Global Mean)'] = f\"{change:+.2f}%\"\n",
    "        out[f'{m} (Baseline)']   = f\"{base_mean[m]:.4f}\"\n",
    "        out[f'{m} (Pruned)']     = f\"{grp[m].mean():.4f}\"\n",
    "    rows.append(out)\n",
    "\n",
    "out_df = pd.DataFrame(rows)\n",
    "\n",
    "# 5) Convert percent strings to float, calculate total gain for CHAIRS & CHAIR-i\n",
    "def pct2float(s): return float(s.strip('%'))\n",
    "out_df['_sum_gain'] = (\n",
    "    out_df['ΔCHAIR-s (Global Mean)'].apply(pct2float) +\n",
    "    out_df['ΔCHAIR-i (Global Mean)'].apply(pct2float)\n",
    ")\n",
    "\n",
    "# 6) Grouping columns, including head group identifier\n",
    "group_cols = ['HeadGroup','Alpha','Beta']\n",
    "\n",
    "# 7) For each head group, select top 10\n",
    "for hg, sub in out_df.groupby('HeadGroup'):\n",
    "    top10 = sub.nsmallest(30, '_sum_gain').drop(columns=['_sum_gain'])\n",
    "    print(f\"\\n#### ===== Head Group {hg} Top 30 Groups with Maximum Gain =====\")\n",
    "    # Output Δ global change rate and original means as needed\n",
    "    print(top10[\n",
    "        group_cols\n",
    "        # Global mean change rate\n",
    "        + [f'Δ{m} (Global Mean)' for m in metrics]\n",
    "        # Original means\n",
    "        + [f'{m} (Baseline)' for m in metrics]\n",
    "        + [f'{m} (Pruned)'   for m in metrics]\n",
    "    ].to_markdown(index=False))\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cir",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
