{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plot histogram"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import torch\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "absolute_path = \"/\".join(os.path.abspath(os.getcwd()).split('/')[:-2])\n",
    "absolute_path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(absolute_path)\n",
    "\n",
    "from utils.experiments import visualization, df_analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "eval_path = 'logs/eval_models'\n",
    "path_to_save = os.path.join(absolute_path, 'experiments/dump_results')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sim_mx_logs = 'logs/similarity_matrix/efficient_pnka/topk_landmarks_from_trainset'\n",
    "nb_landmarks = 1000\n",
    "\n",
    "\n",
    "arch1 = 'r18'\n",
    "arch2 = arch1\n",
    "layer = 'l17'\n",
    "dataset = 'cifar10'\n",
    "\n",
    "seeds_even = np.array([0,0,1])\n",
    "seeds_odd = np.array([1,2,2])\n",
    "seeds_even, seeds_odd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_data = f'{dataset}_testset'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sims = []\n",
    "cka_sims = []\n",
    "folder_names = []\n",
    "for idx, (seed1,seed2) in enumerate(zip(seeds_even, seeds_odd)):\n",
    "    print(seed1, seed2)\n",
    "    sim_mx_folder = f'M1_{dataset}-{arch1}-seed{seed1}_{layer}_M2_{dataset}-{arch2}-seed{seed2}_{layer}'\n",
    "    folder_names.append(f'{seed1}_{seed2}')\n",
    "    sim_mx = torch.load(\n",
    "        os.path.join(\n",
    "            absolute_path, sim_mx_logs, sim_mx_folder, input_data, 'pnka.pt'))\n",
    "    cka_mx = torch.load(\n",
    "        os.path.join(\n",
    "            absolute_path, 'logs/similarity_matrix/cka/', sim_mx_folder, input_data, 'final_sim_xxtyyt.pt'))\n",
    "    norm = torch.load(\n",
    "        os.path.join(\n",
    "            absolute_path, 'logs/similarity_matrix/cka/', sim_mx_folder,input_data, 'norm.pt'))\n",
    "    cka_sims.append(torch.trace(cka_mx)/norm)\n",
    "    \n",
    "    sim = sim_mx\n",
    "    if len(sim.shape) == 2:\n",
    "        sim = torch.diag(sim_mx)\n",
    "    sims.append(sim)\n",
    "\n",
    "sims = torch.stack(sims)\n",
    "cka_sims = torch.stack(cka_sims)\n",
    "sims.shape, cka_sims.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "filename = os.path.join(path_to_save,\n",
    "                        f'hist_{dataset}-{arch1}-avgoverdiffseeds-top{nb_landmarks}balancedlandmarksfromtrain.pdf')\n",
    "filename"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = {}\n",
    "for folder_name, sim in zip(folder_names, sims):\n",
    "    data[folder_name] = sim\n",
    "df = pd.DataFrame(data)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "avg = df.mean(axis=1)\n",
    "std = df.std(axis=1)\n",
    "df['avg'] = avg\n",
    "df['std'] = std"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[df['avg'] < 0.85]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cka_score = torch.mean(cka_sims)\n",
    "cka_score_std = torch.std(cka_sims)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ranges = np.arange(0, 1.1, 0.001)\n",
    "ranges"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# (len(df[df['avg'] < 0.7]) / len(df['avg'])) * 100\n",
    "len(df[df['avg'] < 0.85])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "split_df = []\n",
    "for x, y in zip(ranges, ranges[1:]):\n",
    "    split_df.append(df[(df['avg'] >= x) & (df['avg'] < y)])\n",
    "len(split_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "count_points_split_df = []\n",
    "avg_sim_split_df = []\n",
    "std_sim_split_df = []\n",
    "for split in split_df:\n",
    "    count_points_split_df.append(len(split))\n",
    "    if len(split) == 0:\n",
    "        avg_sim_split_df.append(0)\n",
    "        std_sim_split_df.append(0)\n",
    "    else:\n",
    "        avg_sim_split_df.append(split['avg'].mean())\n",
    "        std_sim_split_df.append(split['std'].mean())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Overall Histogram"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[f'{seed1}_{seed2}'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "agg_pnka = []\n",
    "std_agg_pnka = []\n",
    "for seed1, seed2 in zip(seeds_even, seeds_odd):\n",
    "    agg_pnka.append(df[f'{seed1}_{seed2}'].mean())\n",
    "    std_agg_pnka.append(df[f'{seed1}_{seed2}'].std())\n",
    "\n",
    "# agg_pnka = torch.sum(torch.tensor(df['avg'])) / len(df['avg'])\n",
    "# agg_pnka\n",
    "agg_pnka = torch.tensor(agg_pnka).mean()\n",
    "std_agg_pnka = torch.tensor(std_agg_pnka).mean()\n",
    "\n",
    "agg_pnka, std_agg_pnka"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[df['avg'] < 0.5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "fig = visualization.plot_histogram(\n",
    "    all_x=[df['avg']],\n",
    "    all_names=['count'],\n",
    "    yaxis_title_text='Number of points',\n",
    "    xaxis_title_text='PNKA',\n",
    "#     histnorm='probability density',\n",
    "#     error_y=dict(type='data', array=df['std']),\n",
    "    log=False,\n",
    "    xaxis=dict(\n",
    "            linecolor = \"black\"),\n",
    "    yaxis=dict(\n",
    "            linecolor = \"black\",),\n",
    "    xrange=[0,1],\n",
    "#     yrange=[0,400],\n",
    "    font=dict(\n",
    "        family=\"Times New Roman\",\n",
    "        size=23,\n",
    "        color=\"Black\"\n",
    "    ),\n",
    "    showlegend=False,\n",
    "    save_path=filename,\n",
    "    x_vline=cka_score,\n",
    "    linecolor='orange',\n",
    "    vline_annotation=f'CKA={round(cka_score.item(),2)}(±{round(cka_score_std.item(),2)})',\n",
    ")\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Visualizing top/bottom instances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torchvision\n",
    "\n",
    "transform_test = torchvision.transforms.Compose([\n",
    "    torchvision.transforms.ToTensor(),\n",
    "    torchvision.transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),\n",
    "])\n",
    "\n",
    "if dataset == 'cifar10':\n",
    "    testset = torchvision.datasets.CIFAR10(\n",
    "        root=f'{absolute_path}/data', train=False, download=False, transform=transform_test)\n",
    "else:\n",
    "    testset = torchvision.datasets.CIFAR100(\n",
    "        root=f'{absolute_path}/data', train=False, download=False, transform=transform_test)\n",
    "\n",
    "testset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['labels'] = testset.targets\n",
    "df_sorted_ascending = df.sort_values('avg')\n",
    "nb_instances = 100\n",
    "df_sorted_ascending.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bottom_images = np.take(\n",
    "    testset.data, df_sorted_ascending.index[:nb_instances], axis=0)\n",
    "bottom_labels = df_sorted_ascending['labels'][:nb_instances].values\n",
    "bottom_scores = df_sorted_ascending['avg'][:nb_instances].values\n",
    "bottom_images.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "visualization.plot_instances(\n",
    "    bottom_images, bottom_labels,\n",
    "    x=10, y=10, \n",
    "#     save_path=os.path.join(save_path, f'top{nb_instances}_{folder}.png'),\n",
    "    similarities=bottom_scores,\n",
    "    dataset='cifar')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for idx, bottom_image in enumerate(bottom_images):\n",
    "    transform = torchvision.transforms.ToTensor()\n",
    "    img = transform(bottom_image)\n",
    "    torchvision.utils.save_image(img, f'cifar10_images/bottom_image{idx}.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_sorted_descending = df.sort_values('avg', ascending=False)\n",
    "nb_instances = 100\n",
    "df_sorted_descending.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "top_images = np.take(\n",
    "    testset.data, df_sorted_descending.index[:nb_instances], axis=0)\n",
    "top_labels = df_sorted_descending['labels'][:nb_instances].values\n",
    "top_scores = df_sorted_descending['avg'][:nb_instances].values\n",
    "top_images.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "visualization.plot_instances(\n",
    "    top_images, top_labels,\n",
    "    x=10, y=10, \n",
    "#     save_path=os.path.join(save_path, f'top{nb_instances}_{folder}.png'),\n",
    "    similarities=top_scores,\n",
    "    dataset='cifar')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
