{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import re\n",
    "\n",
    "input_file = \"test.csv\"\n",
    "\n",
    "# List to store Baseline data\n",
    "baseline_rows = []\n",
    "\n",
    "# List to store Group data\n",
    "group_rows = []\n",
    "\n",
    "# Regex: separator line, example \"===== G1(0)_A(0.62)_B(0.80)__G2(1)_A(0.30)_B(0.90) =====\"\n",
    "sep_re = re.compile(\n",
    "    r\"^=+ G1\\((?P<G1_ID>\\d+)\\)_A\\((?P<G1_A>[\\d.]+)\\)_B\\((?P<G1_B>[\\d.]+)\\)__\"\n",
    "    r\"G2\\((?P<G2_ID>\\d+)\\)_A\\((?P<G2_A>[\\d.]+)\\)_B\\((?P<G2_B>[\\d.]+)\\)\"\n",
    ")\n",
    "# Currently active G1/G2 parameters\n",
    "cur = {}\n",
    "\n",
    "with open(input_file, encoding='utf-8') as f:\n",
    "    for line in f:\n",
    "        line = line.strip()\n",
    "        if not line or line.startswith(\"[CHECK]\"):\n",
    "            continue\n",
    "\n",
    "        # 1) Baseline row: 5 columns and starts with a number\n",
    "        if line.count(',') == 4 and re.match(r'^\\d+,', line):\n",
    "            # Round,CHAIR-s,CHAIR-i,F1,Len\n",
    "            r, s, i, f1, ln = line.split(',')\n",
    "            baseline_rows.append({\n",
    "                \"Round\": int(r),\n",
    "                \"CHAIR-s\": float(s),\n",
    "                \"CHAIR-i\": float(i),\n",
    "                \"F1\":       float(f1),\n",
    "                \"Len\":      float(ln),\n",
    "            })\n",
    "            continue\n",
    "\n",
    "        # 2) Separator row: update cur parameters\n",
    "        m = sep_re.match(line)\n",
    "        if m:\n",
    "            cur = {\n",
    "                \"Group1_ID\":    int(m.group(\"G1_ID\")),\n",
    "                \"Group1_Alpha\": float(m.group(\"G1_A\")),\n",
    "                \"Group1_Beta\":  float(m.group(\"G1_B\")),\n",
    "                \"Group2_ID\":    int(m.group(\"G2_ID\")),\n",
    "                \"Group2_Alpha\": float(m.group(\"G2_A\")),\n",
    "                \"Group2_Beta\":  float(m.group(\"G2_B\")),\n",
    "            }\n",
    "            continue\n",
    "\n",
    "        # 3) Pruned group data row: 10 columns (4 α/β + Round + Type + 4 metrics)\n",
    "        if line.count(',') >= 9 and cur:\n",
    "            parts = line.split(',')\n",
    "            # Map according to \"G1/G2 parameter order\"\n",
    "            alpha1, beta1, alpha2, beta2, round_, typ, s, i, f1, ln = parts[:10]\n",
    "            group_rows.append({\n",
    "                **cur,\n",
    "                \"Round\":    int(round_),\n",
    "                \"Type\":     typ,\n",
    "                \"CHAIR-s\":  float(s),\n",
    "                \"CHAIR-i\":  float(i),\n",
    "                \"F1\":       float(f1),\n",
    "                \"Len\":      float(ln),\n",
    "            })\n",
    "            continue\n",
    "\n",
    "# Convert lists to DataFrames\n",
    "baseline_df = pd.DataFrame(baseline_rows)\n",
    "group_df    = pd.DataFrame(group_rows)\n",
    "\n",
    "# Key metrics\n",
    "metrics = [\"CHAIR-s\", \"CHAIR-i\", \"F1\", \"Len\"]\n",
    "\n",
    "# Consistent sorting\n",
    "baseline_df = baseline_df.sort_values(\"Round\")\n",
    "group_df    = group_df.sort_values([\"Group1_ID\",\"Group2_ID\",\"Group1_Alpha\",\"Group1_Beta\",\n",
    "                                    \"Group2_Alpha\",\"Group2_Beta\",\"Round\"])\n",
    "\n",
    "# Grouping keys\n",
    "group_cols = [\"Group1_ID\",\"Group2_ID\",\n",
    "              \"Group1_Alpha\",\"Group1_Beta\",\n",
    "              \"Group2_Alpha\",\"Group2_Beta\"]\n",
    "\n",
    "rows = []\n",
    "for params, grp in group_df.groupby(group_cols):\n",
    "    out = dict(zip(group_cols, params))\n",
    "\n",
    "    # Global mean change rate\n",
    "    for m in metrics:\n",
    "        base_mean = baseline_df[m].mean()\n",
    "        prun_mean = grp[m].mean()\n",
    "        out[f\"Δ{m} (Global Mean)\"] = f\"{(prun_mean - base_mean) / base_mean * 100:+.2f}%\"\n",
    "\n",
    "    # Per-round mean change rate\n",
    "    merged = pd.merge(\n",
    "        baseline_df,\n",
    "        grp[[\"Round\"] + metrics],\n",
    "        on=\"Round\",\n",
    "        suffixes=(\"_base\",\"_prun\")\n",
    "    )\n",
    "    for m in metrics:\n",
    "        change = ((merged[f\"{m}_prun\"] - merged[f\"{m}_base\"]) /\n",
    "                  merged[f\"{m}_base\"] * 100).mean()\n",
    "        out[f\"Δ{m} (Per-round Mean)\"] = f\"{change:+.2f}%\"\n",
    "\n",
    "    # Mean ± std\n",
    "    for m in metrics:\n",
    "        bmean, bstd = baseline_df[m].mean(), baseline_df[m].std()\n",
    "        pmean, pstd = grp[m].mean(), grp[m].std()\n",
    "        out[f\"{m} (Baseline)\"] = f\"{bmean:.4f}±{bstd:.4f}\"\n",
    "        out[f\"{m} (Pruned)\"]   = f\"{pmean:.4f}±{pstd:.4f}\"\n",
    "\n",
    "    rows.append(out)\n",
    "\n",
    "# Output\n",
    "order = (\n",
    "    group_cols\n",
    "    + [f\"Δ{m} (Global Mean)\"  for m in metrics]\n",
    "    + [f\"Δ{m} (Per-round Mean)\"  for m in metrics]\n",
    "    + [f\"{m} (Baseline)\"  for m in metrics]\n",
    "    + [f\"{m} (Pruned)\"     for m in metrics]\n",
    ")\n",
    "out_df = pd.DataFrame(rows)[order]\n",
    "print(out_df.to_markdown(index=False))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "def percent_str_to_float(s):\n",
    "    return float(str(s).replace('%', ''))\n",
    "\n",
    "# Only display α/β and each metric\n",
    "fields = [\n",
    "    'Group1_Alpha', 'Group1_Beta',\n",
    "    'Group2_Alpha', 'Group2_Beta',\n",
    "    'ΔCHAIR-s (Global Mean)', 'ΔCHAIR-i (Global Mean)', 'ΔF1 (Global Mean)',\n",
    "    'CHAIR-s (Baseline)', 'CHAIR-s (Pruned)',\n",
    "    'CHAIR-i (Baseline)', 'CHAIR-i (Pruned)',\n",
    "    'F1 (Baseline)', 'F1 (Pruned)'\n",
    "]\n",
    "\n",
    "# For each head group combination (Group1_ID, Group2_ID), output Top 15 separately\n",
    "for (g1, g2), df_sub in out_df.groupby(['Group1_ID', 'Group2_ID']):\n",
    "    df = df_sub.copy()\n",
    "    # Calculate the sum of ΔCHAIR-s + ΔCHAIR-i for sorting\n",
    "    df['_sum_gain'] = (\n",
    "        df['ΔCHAIR-s (Global Mean)'].apply(percent_str_to_float) +\n",
    "        df['ΔCHAIR-i (Global Mean)'].apply(percent_str_to_float)\n",
    "    )\n",
    "    # Select the top 30 rows with the smallest total gain (minimum loss / maximum improvement)\n",
    "    top15 = df.nsmallest(30, '_sum_gain').drop(columns=['_sum_gain'])\n",
    "    \n",
    "    # Print the title, distinguish only by α/β, do not output the ID columns\n",
    "    print(f\"\\n##### Head group combination G1={g1}, G2={g2} filtered results:\")\n",
    "    print(top15[fields].to_markdown(index=False))\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cir",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
