{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "45d082f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from pathlib import Path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "351c7258",
   "metadata": {},
   "outputs": [],
   "source": [
    "rename_map = {\n",
    "    \"IMBA_5_Beta_FeatureWise\": \"Ours (Beta, FW)\",\n",
    "    \"IMBA_5_Exp_FeatureWise\": \"Ours (Exp, FW)\",\n",
    "    \"GreedySemanticEnrichedMTEAveDissimilarity\": \"CoCoA_NMTE\",\n",
    "    \"GreedySemanticEnrichedMaxprobAveDissimilarity\": \"CoCoA_MSP\",\n",
    "    \"GreedySemanticEnrichedPPLAveDissimilarity\": \"CoCoA_PPL\",\n",
    "    \"MonteCarloSequenceEntropy\": \"MCSE\",\n",
    "    \"MonteCarloNormalizedSequenceEntropy\": \"MCNSE\",\n",
    "    \"SemanticEntropy\": \"Semantic_Entropy\",\n",
    "    \"SAR_t0.001\": \"SAR\",\n",
    "    \"DegMat_NLI_score_entail\": \"DegMat\",\n",
    "    \"EigValLaplacian_NLI_score_entail\": \"EigValLaplacian\",\n",
    "    \"MaximumSequenceProbability\": \"MSP\",\n",
    "    \"Perplexity\": \"PPL\",\n",
    "    \"MeanTokenEntropy\": \"NMTE\",\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "9b4db58e",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_col_rename = {\n",
    "    \"xsum\": \"XSum\",\n",
    "    \"wmt14_fren\": \"WMT14FrEn\",\n",
    "    \"wmt19_deen\": \"WMT19DeEn\",\n",
    "    \"coqa_no_context\": \"CoQA\",\n",
    "    \"trivia\": \"Trivia\",\n",
    "    \"mmlu\": \"MMLU\",\n",
    "    \"gsm8k_cot\": \"GSM8k\",\n",
    "}\n",
    "\n",
    "\n",
    "def rename_dataset_columns(df):\n",
    "    # Only rename columns that match keys, ignore 'Method' and 'mean'\n",
    "    new_cols = {}\n",
    "    for c in df.columns:\n",
    "        if c in dataset_col_rename:\n",
    "            new_cols[c] = dataset_col_rename[c]\n",
    "    return df.rename(columns=new_cols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "7d04f449",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load data\n",
    "paths = {\n",
    "    \"LLaMA-8B\": \"../resources/llm_resources/llama8b_results.csv\",\n",
    "    \"Mistral-7B\": \"../resources/llm_resources/mistral7b_results.csv\",\n",
    "    \"Falcon-7B\": \"../resources/llm_resources/falcon7b_results.csv\",\n",
    "}\n",
    "dfs = {name: pd.read_csv(path) for name, path in paths.items()}\n",
    "\n",
    "# Apply dataset column renaming to all dataframes\n",
    "dfs = {name: rename_dataset_columns(df) for name, df in dfs.items()}\n",
    "\n",
    "# Selection config\n",
    "FOUR_METHODS_ORIG = [\n",
    "    \"IMBA_5_Beta_FeatureWise\",\n",
    "    \"IMBA_5_Exp_FeatureWise\",\n",
    "    \"Master_1_Exp_FeatureWise\",\n",
    "    \"Master_1_Beta_FeatureWise\",\n",
    "]\n",
    "EXCLUDE_PREFIXES = (\"IMBA\", \"Master\", \"CoCoA\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "7fb27b941602401d91542211134fc71a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def filter_methods(df):\n",
    "    m = df[\"Method\"].astype(str)\n",
    "    keep_mask = m.isin(FOUR_METHODS_ORIG) | ~m.str.startswith(EXCLUDE_PREFIXES)\n",
    "    return df.loc[keep_mask].copy()\n",
    "\n",
    "\n",
    "def dataset_columns(df):\n",
    "    # dataset columns = everything except 'Method' and 'mean'\n",
    "    return [c for c in df.columns if c not in (\"Method\", \"mean\")]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aa7a129e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "1604733c",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_dir = Path(\"../\")\n",
    "out_dir.mkdir(parents=True, exist_ok=True)\n",
    "\n",
    "# --- Per-model LaTeX tables ---\n",
    "for model_name, df in dfs.items():\n",
    "    dff = filter_methods(df)\n",
    "\n",
    "    # Order rows: four methods first (in given order), then the rest alphabetically by RENAMED name\n",
    "    is_four = dff[\"Method\"].isin(FOUR_METHODS_ORIG)\n",
    "    dff_four = dff[is_four].copy()\n",
    "    dff_rest = dff[~is_four].copy()\n",
    "\n",
    "    # Apply rename for output\n",
    "    dff_four[\"Method\"] = dff_four[\"Method\"].replace(rename_map)\n",
    "    dff_rest[\"Method\"] = dff_rest[\"Method\"].replace(rename_map)\n",
    "\n",
    "    # Preserve the order of the four, but based on their renamed labels\n",
    "    order_map = {\n",
    "        rename_map.get(\"IMBA_5_Beta_FeatureWise\", \"IMBA_5_Beta_FeatureWise\"): 0,\n",
    "        rename_map.get(\"IMBA_5_Exp_FeatureWise\", \"IMBA_5_Exp_FeatureWise\"): 1,\n",
    "        rename_map.get(\"Master_1_Exp_FeatureWise\", \"Master_1_Exp_FeatureWise\"): 2,\n",
    "        rename_map.get(\"Master_1_Beta_FeatureWise\", \"Master_1_Beta_FeatureWise\"): 3,\n",
    "    }\n",
    "    dff_four[\"__order\"] = dff_four[\"Method\"].map(order_map)\n",
    "\n",
    "    dff_rest = dff_rest.sort_values(\"Method\")\n",
    "    dff_rest[\"__order\"] = 1000 + np.arange(len(dff_rest))\n",
    "\n",
    "    dff_out = (\n",
    "        pd.concat([dff_four, dff_rest], ignore_index=True)\n",
    "        .sort_values(\"__order\")\n",
    "        .drop(columns=\"__order\")\n",
    "    )\n",
    "\n",
    "    # Round numbers for LaTeX display\n",
    "    dff_out_fmt = dff_out.copy()\n",
    "    for col in dff_out_fmt.columns:\n",
    "        if col != \"Method\":\n",
    "            dff_out_fmt[col] = dff_out_fmt[col].map(lambda x: f\"{x:.3f}\")\n",
    "\n",
    "    caption = f\"{model_name}: filtered methods (renamed). Higher is better.\"\n",
    "    label = (\n",
    "        f\"tab:{model_name.lower().replace('-', '').replace(' ', '')}_filtered_renamed\"\n",
    "    )\n",
    "    latex = dff_out_fmt.to_latex(\n",
    "        index=False, caption=caption, label=label, escape=False\n",
    "    )\n",
    "    (out_dir / f\"{model_name}_filtered_renamed.tex\").write_text(latex)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "acae54e37e7d407bbb7b55eff062a284",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Joint table: choose top-3 datasets by average performance of the four methods across models ---\n",
    "\n",
    "# 1) Intersection of dataset columns\n",
    "common_cols = None\n",
    "for df in dfs.values():\n",
    "    cols = set(dataset_columns(df))\n",
    "    common_cols = cols if common_cols is None else (common_cols & cols)\n",
    "common_cols = sorted(list(common_cols))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "675eb6f0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['CoQA', 'GSM8k', 'MMLU', 'Trivia', 'WMT14FrEn', 'WMT19DeEn', 'XSum']"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "common_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "d7bbfa2f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2) Average across models & methods\n",
    "avg_scores = {}\n",
    "for col in common_cols:\n",
    "    vals = []\n",
    "    for df in dfs.values():\n",
    "        # sub = df[df[\"Method\"].isin(FOUR_METHODS)]\n",
    "        sub = df\n",
    "        if col in sub.columns and not sub.empty:\n",
    "            vals.extend(sub[col].tolist())\n",
    "    if vals:\n",
    "        avg_scores[col] = np.mean(vals)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "158095e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3) Pick top-3 datasets\n",
    "top3 = sorted(avg_scores, key=avg_scores.get, reverse=True)[:3]\n",
    "\n",
    "rows = []\n",
    "for orig_method in dff[\"Method\"].unique():\n",
    "    row = {\"Method\": rename_map.get(orig_method, orig_method)}\n",
    "    for model_name, df in dfs.items():\n",
    "        sub = df[df[\"Method\"] == orig_method]\n",
    "        for col in top3:\n",
    "            key = f\"{model_name} {col}\"\n",
    "            row[key] = float(sub.iloc[0][col]) if not sub.empty else np.nan\n",
    "    rows.append(row)\n",
    "joint_df = pd.DataFrame(rows)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "41af8ec3",
   "metadata": {},
   "outputs": [],
   "source": [
    "joint_df[\"mean\"] = joint_df.iloc[:, 1:].mean(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "7829b2d2",
   "metadata": {},
   "outputs": [],
   "source": [
    "joint_df_sorted = joint_df.sort_values(by=\"mean\", ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e74a97a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "7cfc5bae",
   "metadata": {},
   "outputs": [],
   "source": [
    "joint_df_sorted.loc[\"mean\"] = joint_df_sorted.iloc[:, 1:].mean(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "933c96ea",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "b9231900",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Falcon-7B_filtered_renamed.tex',\n",
       " 'LLaMA-8B_filtered_renamed.tex',\n",
       " 'Mistral-7B_filtered_renamed.tex',\n",
       " 'joint_top3_renamed.tex']"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Format\n",
    "joint_fmt = joint_df_sorted.copy()\n",
    "for col in joint_fmt.columns:\n",
    "    if col != \"Method\":\n",
    "        joint_fmt[col] = joint_fmt[col].map(lambda x: \"\" if pd.isna(x) else f\"{x:.3f}\")\n",
    "\n",
    "joint_caption = (\n",
    "    \"Top-3 datasets by average performance of the four selected methods (renamed) across models. \"\n",
    "    \"Rows: methods; columns: (model, dataset). Higher is better.\"\n",
    ")\n",
    "joint_label = \"tab:joint_top3_renamed\"\n",
    "(out_dir / \"joint_top3_renamed.tex\").write_text(\n",
    "    joint_fmt.to_latex(\n",
    "        index=False, caption=joint_caption, label=joint_label, escape=False\n",
    "    )\n",
    ")\n",
    "\n",
    "sorted([p.name for p in out_dir.glob(\"*.tex\")])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d0189af6",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a63283cbaf04dbcab1f6479b197f3a8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "multidimensional-uncertainty",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
