{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[INFO] Total skipped samples (Len < 10): 0\n",
      "[INFO] Total skipped samples (Len < 10): 0\n",
      "| Group       | CHAIR-s | CHAIR-i | Recall | Precision |   F1   |  Len   |\n",
      "|:------------|:-------:|:-------:|:------:|:---------:|:------:|:------:|\n",
      "| Baseline    | 0.9857  | 0.3025  | 0.7758 | 0.6107    | 0.6834 | 93.32 |\n",
      "| captions     | 0.7458  | 0.2594  | 0.7665 | 0.6741    | 0.7173 | 84.75 |\n",
      "\n",
      "| experiment   | ΔCHAIR-s   | ΔCHAIR-i   | ΔF1      | ΔLen    |\n",
      "|:-------------|:-----------|:-----------|:---------|:--------|\n",
      "| captions      |   -24.34% |   -14.26% |   +4.96% |   -9.18% |\n",
      "| 平均变化率         |   -24.34% |   -14.26% |   +4.96% |   -9.18% |\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import sys\n",
    "import json\n",
    "import glob\n",
    "from collections import defaultdict\n",
    "import re\n",
    "from pathlib import Path\n",
    "\n",
    "# Add the parent directory to the import path for custom metrics\n",
    "parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))\n",
    "sys.path.insert(0, parent_dir)\n",
    "\n",
    "from chair_metrics import batch_compute_chair_metrics\n",
    "\n",
    "baseline_file    = './mid/baseline_captions.jsonl'\n",
    "experiment_files = sorted(glob.glob('./mid/patched_captions.jsonl'))\n",
    "txt_file         = '../all_img_names.txt'\n",
    "ann_file         = 'path/to/val2014/annotations/instances_val2014.json'\n",
    "\n",
    "# Load image names and COCO ground-truth labels\n",
    "with open(txt_file, 'r', encoding='utf-8') as f:\n",
    "    all_img_names = [l.strip() for l in f]\n",
    "with open(ann_file, 'r', encoding='utf-8') as f:\n",
    "    coco = json.load(f)\n",
    "imgid2fname = {img[\"id\"]: img[\"file_name\"] for img in coco[\"images\"]}\n",
    "catid2name  = {cat[\"id\"]: cat[\"name\"] for cat in coco[\"categories\"]}\n",
    "fname2labels = defaultdict(set)\n",
    "for ann in coco[\"annotations\"]:\n",
    "    fname2labels[imgid2fname[ann[\"image_id\"]]].add(catid2name[ann[\"category_id\"]])\n",
    "fname2labels = {fn: list(v) for fn, v in fname2labels.items()}\n",
    "\n",
    "def compute_metrics(jsonl_path):\n",
    "    preds, gt = [], []\n",
    "    with open(jsonl_path, 'r', encoding='utf-8') as f:\n",
    "        for line in f:\n",
    "            entry = json.loads(line)\n",
    "            fn = imgid2fname[entry['image_id']]\n",
    "            caption = entry['caption']\n",
    "            if len(caption.strip()) < 10:\n",
    "                continue  # Skip if caption is too short\n",
    "            preds.append(caption)\n",
    "            gt.append(fname2labels.get(fn, []))\n",
    "    return batch_compute_chair_metrics(preds, gt)\n",
    "\n",
    "# Compute metrics for baseline and experiment groups\n",
    "base_m = compute_metrics(baseline_file)\n",
    "exp_metrics = []\n",
    "for exp_path in experiment_files:\n",
    "    fn = Path(exp_path).stem\n",
    "    m = re.search(r'alpha([0-9.]+)_layers_([0-9-]+)', fn)\n",
    "    if m:\n",
    "        alpha, layers = m.group(1), m.group(2)\n",
    "        label = f\"α{alpha} L{layers}\"\n",
    "    else:\n",
    "        label = fn.split('_')[-1]\n",
    "\n",
    "    m = compute_metrics(exp_path)\n",
    "    exp_metrics.append((label, m))\n",
    "\n",
    "# Print main metric table\n",
    "print(\"| Group       | CHAIR-s | CHAIR-i | Recall | Precision |   F1   |  Len   |\")\n",
    "print(\"|:------------|:-------:|:-------:|:------:|:---------:|:------:|:------:|\")\n",
    "print(f\"| Baseline    | {base_m['CHAIR-s']:.4f}  | {base_m['CHAIR-i']:.4f}  | {base_m['Recall']:.4f} | {base_m['Precision']:.4f}    | {base_m['F1']:.4f} | {base_m['Len']:.2f} |\")\n",
    "for label, m in exp_metrics:\n",
    "    print(f\"| {label:<12} | {m['CHAIR-s']:.4f}  | {m['CHAIR-i']:.4f}  | {m['Recall']:.4f} | {m['Precision']:.4f}    | {m['F1']:.4f} | {m['Len']:.2f} |\")\n",
    "\n",
    "def delta(a, b):\n",
    "    return (a - b) / abs(b) * 100 if b != 0 else 0\n",
    "\n",
    "# Calculate and print percentage change (delta) for each metric\n",
    "rows = []\n",
    "for label, m in exp_metrics:\n",
    "    rows.append({\n",
    "        'exp':        label,\n",
    "        'ΔCHAIR-s':   delta(m['CHAIR-s'], base_m['CHAIR-s']),\n",
    "        'ΔCHAIR-i':   delta(m['CHAIR-i'], base_m['CHAIR-i']),\n",
    "        'ΔF1':        delta(m['F1'],      base_m['F1']),\n",
    "        'ΔLen':       delta(m['Len'],     base_m['Len']),\n",
    "    })\n",
    "avg = {\n",
    "    'exp':      'Average Change',\n",
    "    'ΔCHAIR-s': sum(r['ΔCHAIR-s'] for r in rows)/len(rows),\n",
    "    'ΔCHAIR-i': sum(r['ΔCHAIR-i'] for r in rows)/len(rows),\n",
    "    'ΔF1':      sum(r['ΔF1']      for r in rows)/len(rows),\n",
    "    'ΔLen':     sum(r['ΔLen']     for r in rows)/len(rows),\n",
    "}\n",
    "rows.append(avg)\n",
    "\n",
    "print(\"\\n| Experiment   | ΔCHAIR-s   | ΔCHAIR-i   | ΔF1      | ΔLen    |\")\n",
    "print(\"|:-------------|:-----------|:-----------|:---------|:--------|\")\n",
    "for r in rows:\n",
    "    print(f\"| {r['exp']:<13} | \"\n",
    "          f\"{r['ΔCHAIR-s']:+8.2f}% | \"\n",
    "          f\"{r['ΔCHAIR-i']:+8.2f}% | \"\n",
    "          f\"{r['ΔF1']:+7.2f}% | \"\n",
    "          f\"{r['ΔLen']:+7.2f}% |\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cir",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
