{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "train_perc: float = 1\n",
    "COLUMNS_TO_DROP = [\"precision\", \"recall\"]\n",
    "\n",
    "imagenet_name: str = \"imagenet-1k\"\n",
    "cifarfine_name: str = \"cifar100-fine\"\n",
    "cifarcoarse_name: str = \"cifar100-coarse\"\n",
    "\n",
    "\n",
    "def rearrange_embedtype_as_column(mydf):\n",
    "    relative_out = mydf[mydf[\"embed_type\"] == \"relative\"]\n",
    "    relative_out.columns = pd.MultiIndex.from_tuples(\n",
    "        [\n",
    "            (\n",
    "                \"seed\",\n",
    "                \"\",\n",
    "            ),\n",
    "            (\"embed_type\", \"\"),\n",
    "            (\"train_model\", \"\"),\n",
    "            (\"test_model\", \"\"),\n",
    "            (\"Relative\", \"fscore\"),\n",
    "            (\"stitched\", \"\"),\n",
    "        ],\n",
    "    )\n",
    "\n",
    "    absolute_out = mydf[mydf[\"embed_type\"] == \"absolute\"]\n",
    "    absolute_out.columns = pd.MultiIndex.from_tuples(\n",
    "        [\n",
    "            (\n",
    "                \"seed\",\n",
    "                \"\",\n",
    "            ),\n",
    "            (\"embed_type\", \"\"),\n",
    "            (\"train_model\", \"\"),\n",
    "            (\"test_model\", \"\"),\n",
    "            (\"Absolute\", \"fscore\"),\n",
    "            (\"stitched\", \"\"),\n",
    "        ],\n",
    "    )\n",
    "\n",
    "    return pd.merge(\n",
    "        relative_out.drop(columns=[(\"embed_type\", \"\")]),\n",
    "        absolute_out.drop(columns=[(\"embed_type\", \"\")]),\n",
    "        on=[\n",
    "            (\"train_model\", \"\"),\n",
    "            (\"test_model\", \"\"),\n",
    "            (\"seed\", \"\"),\n",
    "            (\"stitched\", \"\"),\n",
    "        ],\n",
    "    )\n",
    "\n",
    "\n",
    "def read_df(dataset_name, train_perc):\n",
    "\n",
    "    full_df = pd.read_csv(\n",
    "        f\"vision_transformer-stitching-{dataset_name}-{train_perc}.tsv\",\n",
    "        sep=\"\\t\",\n",
    "        index_col=0,\n",
    "    )\n",
    "\n",
    "    full_df = full_df.drop(columns=COLUMNS_TO_DROP)\n",
    "\n",
    "    full_df[\"fscore\"] = full_df[\"fscore\"] * 100\n",
    "    full_df = rearrange_embedtype_as_column(full_df)\n",
    "\n",
    "    return full_df\n",
    "\n",
    "\n",
    "cifarcoarse = read_df(cifarcoarse_name, \"1\")\n",
    "\n",
    "cifarfine = read_df(cifarfine_name, \"1\")\n",
    "\n",
    "imagenet = read_df(imagenet_name, \"0.2\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CIFAR Coarse + ImageNet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cifarcoarse = read_df(cifarcoarse_name, \"1\")\n",
    "imagenet = read_df(imagenet_name, \"0.2\")\n",
    "\n",
    "cifarcoarse.columns = pd.MultiIndex.from_tuples(\n",
    "    [\n",
    "        (\"seed\", \"\", \"\"),\n",
    "        (\"train_model\", \"\", \"\"),\n",
    "        (\"test_model\", \"\", \"\"),\n",
    "        (\"Cifar100 Coarse\", \"Relative\", \"fscore\"),\n",
    "        (\"stitched\", \"\", \"\"),\n",
    "        (\"Cifar100 Coarse\", \"Absolute\", \"fscore\"),\n",
    "    ],\n",
    ")\n",
    "imagenet.columns = pd.MultiIndex.from_tuples(\n",
    "    [\n",
    "        (\"seed\", \"\", \"\"),\n",
    "        (\"train_model\", \"\", \"\"),\n",
    "        (\"test_model\", \"\", \"\"),\n",
    "        (\"imagenet\", \"Relative\", \"fscore\"),\n",
    "        (\"stitched\", \"\", \"\"),\n",
    "        (\"imagenet\", \"Absolute\", \"fscore\"),\n",
    "    ],\n",
    ")\n",
    "\n",
    "cifarimagenet = pd.merge(\n",
    "    cifarcoarse,\n",
    "    imagenet,\n",
    "    how=\"outer\",\n",
    "    on=[\n",
    "        (\"seed\", \"\"),\n",
    "        (\"train_model\", \"\"),\n",
    "        (\"test_model\", \"\"),\n",
    "        (\"stitched\", \"\"),\n",
    "    ],\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def to_latex(df, label):\n",
    "    return df.to_latex(\n",
    "        escape=False,\n",
    "        caption=f\"Train perc: {train_perc} {label}\",\n",
    "        label=f\"tab:multilingual-{label}\",\n",
    "        multirow=True,\n",
    "        sparsify=True,\n",
    "        multicolumn_format=\"c\",\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option(\"display.max_rows\", None)\n",
    "\n",
    "\n",
    "def formatter(mean, std):\n",
    "    if isinstance(mean, str) or isinstance(std, str):\n",
    "        return r\"\\multicolumn{1}{c}{-}\"\n",
    "    return f\"${mean:.2f} \\pm {std:.2f}$\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cifarimagenet.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "full_df = cifarimagenet\n",
    "\n",
    "full_df = full_df[full_df[\"train_model\", \"\", \"\"] != \"cspdarknet53\"]\n",
    "full_df = full_df[full_df[\"test_model\", \"\", \"\"] != \"cspdarknet53\"]\n",
    "\n",
    "full_df[\"train_model\", \"\", \"\"] = [x.replace(\"_\", \"-\") for x in full_df[\"train_model\", \"\", \"\"]]\n",
    "full_df[\"test_model\", \"\", \"\"] = [x.replace(\"_\", \"-\") for x in full_df[\"test_model\", \"\", \"\"]]\n",
    "\n",
    "full_df = full_df.drop(columns=[(\"stitched\", \"\", \"\"), (\"seed\", \"\", \"\")])\n",
    "df = (\n",
    "    full_df.groupby(\n",
    "        [(\"train_model\", \"\", \"\"), (\"test_model\", \"\", \"\")],\n",
    "    )\n",
    "    .agg([np.mean, np.std])\n",
    "    .round(2)\n",
    ")\n",
    "df = df.fillna(\"-\")\n",
    "\n",
    "o = df.copy()\n",
    "\n",
    "for dataset_name in (\"Cifar100 Coarse\", \"imagenet\"):\n",
    "    for embed in (\n",
    "        \"Absolute\",\n",
    "        \"Relative\",\n",
    "    ):\n",
    "        for metric, new_name in ((\"fscore\", \"FScore\"),):\n",
    "            df[(dataset_name, embed, new_name, \"\")] = df.apply(\n",
    "                lambda row: formatter(\n",
    "                    row[(dataset_name, embed, metric, \"mean\")], row[(dataset_name, embed, metric, \"std\")]\n",
    "                ),\n",
    "                axis=1,\n",
    "            )\n",
    "            for agg in (\"mean\", \"std\"):\n",
    "                df = df.drop(columns=[(dataset_name, embed, metric, agg)])\n",
    "\n",
    "print(to_latex(df, \"en\"))\n",
    "o"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# CIFAR Fine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_df = cifarfine\n",
    "\n",
    "full_df = full_df[full_df[\"train_model\", \"\"] != \"cspdarknet53\"]\n",
    "full_df = full_df[full_df[\"test_model\", \"\"] != \"cspdarknet53\"]\n",
    "\n",
    "full_df[\"train_model\", \"\"] = [x.replace(\"_\", \"-\") for x in full_df[\"train_model\", \"\"]]\n",
    "full_df[\"test_model\", \"\"] = [x.replace(\"_\", \"-\") for x in full_df[\"test_model\", \"\"]]\n",
    "\n",
    "full_df = full_df.drop(columns=[(\"stitched\", \"\"), (\"seed\", \"\")])\n",
    "df = (\n",
    "    full_df.groupby(\n",
    "        [(\"train_model\", \"\"), (\"test_model\", \"\")],\n",
    "    )\n",
    "    .agg([np.mean, np.std])\n",
    "    .round(2)\n",
    ")\n",
    "df = df.fillna(\"-\")\n",
    "\n",
    "cols = [\n",
    "    (\"Absolute\", \"fscore\", \"mean\"),\n",
    "    (\"Absolute\", \"fscore\", \"std\"),\n",
    "    (\"Relative\", \"fscore\", \"mean\"),\n",
    "    (\"Relative\", \"fscore\", \"std\"),\n",
    "]\n",
    "df = df[cols]\n",
    "o = df.copy()\n",
    "\n",
    "for embed in (\n",
    "    \"Absolute\",\n",
    "    \"Relative\",\n",
    "):\n",
    "    for metric, new_name in ((\"fscore\", \"FScore\"),):\n",
    "        df[(embed, new_name, \"\")] = df.apply(\n",
    "            lambda row: formatter(row[(embed, metric, \"mean\")], row[(embed, metric, \"std\")]),\n",
    "            axis=1,\n",
    "        )\n",
    "        for agg in (\"mean\", \"std\"):\n",
    "            df = df.drop(columns=[(embed, metric, agg)])\n",
    "\n",
    "print(to_latex(df, \"en\"))\n",
    "o"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
