{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6c95a8f9-b55c-46ff-be96-069f965ab1c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "import matplotlib.pyplot as plt\n",
    "from scipy import stats\n",
    "import pickle\n",
    "\n",
    "from matplotlib import rcParams\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "\n",
    "rcParams[\"font.family\"] = \"serif\"\n",
    "rcParams[\"grid.linestyle\"] = ':'\n",
    "rcParams[\"xtick.direction\"] = 'in'\n",
    "rcParams[\"ytick.direction\"] = 'in'\n",
    "rcParams[\"legend.fontsize\"] = 9\n",
    "rcParams[\"axes.labelsize\"] = 20\n",
    "rcParams[\"axes.titlesize\"] = 20\n",
    "rcParams[\"xtick.labelsize\"] = 15\n",
    "rcParams[\"ytick.labelsize\"] = 15"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "da04affd-2108-4bac-8d2c-d118d8fc1754",
   "metadata": {},
   "source": [
    "## Load arguments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a2112166-1207-44c3-9424-abea694e0c8c",
   "metadata": {},
   "outputs": [],
   "source": [
    "results_dir = '../zero_shot_evaluations'\n",
    "search_counts_dir = '../search_counts'"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "af32f6c1-2ddb-4967-9b22-3578f1ef2558",
   "metadata": {},
   "source": [
    "## Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "3f0765e1-9f0d-4d75-a52c-e10d5942878b",
   "metadata": {},
   "outputs": [],
   "source": [
    "zero_shot_results = [os.path.join(results_dir, x) for x in os.listdir(results_dir) if 'class_results.json' in x]\n",
    "rampp_07_search_counts = [os.path.join(search_counts_dir, x) for x in os.listdir(search_counts_dir) if '0.7_' in x and 'integrated' in x]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "30546f93-fadd-43ec-9dde-ce5648dc82f7",
   "metadata": {},
   "source": [
    "## Function Fitting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "cd12de20-2da0-43b9-a3c9-f7e0b82ea4a0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_plotting_info(curr_model, pretrained_dataset, prompt_type):\n",
    "\n",
    "    datasets_included = [\n",
    "        'imagenet', \n",
    "        'cifar10', \n",
    "        'cifar100', \n",
    "        'caltech101', \n",
    "        'caltech256', \n",
    "        'sun397', \n",
    "        'birdsnap', \n",
    "        'country211', \n",
    "        'cub', \n",
    "        'dtd', \n",
    "        'eurosat', \n",
    "        'fgvcaircraft', \n",
    "        'flowers102', \n",
    "        'food101', \n",
    "        'imagenet-sketch', \n",
    "        'oxfordpets', \n",
    "        'stanfordcars', \n",
    "        'ucf101'\n",
    "    ]\n",
    "    \n",
    "    _7_counts = []\n",
    "    accuracies = []\n",
    "    classnames = []\n",
    "    \n",
    "    for downstream_dataset in datasets_included:\n",
    "        _7path = os.path.join(search_counts_dir, '{}_{}_integrated_tlemmatized_i0.7_search_counts.json'.format(downstream_dataset, pretrained_dataset))\n",
    "        \n",
    "        dataset = _7path.split('/')[-1].split('_')[0]\n",
    "\n",
    "        res_path = os.path.join(results_dir, '{}_{}_results.json'.format(dataset, prompt_type))\n",
    "        # load zs-results\n",
    "        with open(res_path, 'r') as f:\n",
    "            zs_results = json.load(f)\n",
    "        # for now just take the first model that satisfies the pt_dataset we have\n",
    "        for m_key in zs_results.keys():\n",
    "            if pretrained_dataset in m_key:\n",
    "                if curr_model in m_key:\n",
    "                    req_model = m_key\n",
    "                    break\n",
    "        model_results = zs_results[req_model]['classwise']\n",
    "        if dataset == 'cifar10':\n",
    "            model_results['airplane'] = model_results['plane']\n",
    "            model_results.pop('plane', None)\n",
    "    \n",
    "        # load counts\n",
    "        with open(_7path, 'r') as f:    \n",
    "            _7json = json.load(f)\n",
    "    \n",
    "        for key in sorted(_7json):\n",
    "            assert key in model_results, 'Key mismatch {} for {}'.format(key, dataset)\n",
    "            classnames.append(key)\n",
    "            _7_counts.append(_7json[key])\n",
    "            accuracies.append(model_results[key])\n",
    "\n",
    "    _7_counts_positive = [x if x > 0 else 1 for x in _7_counts]\n",
    "    \n",
    "    x_vals = np.log(_7_counts_positive)\n",
    "    \n",
    "    bins = np.linspace(min(x_vals), max(x_vals), num=12)\n",
    "    assigned_bins = np.digitize(x_vals, bins, right=True)\n",
    "    \n",
    "    cumsums = [0]*len(bins)\n",
    "    cumcounts = [0]*len(bins)\n",
    "    cumarrs = {ab:[] for ab in assigned_bins}\n",
    "    for acc, xv, ab in zip(accuracies, x_vals, assigned_bins):\n",
    "        cumsums[ab] += acc\n",
    "        cumcounts[ab] += 1\n",
    "        cumarrs[ab].append(acc)\n",
    "    cumaccs = [s/c if c > 0 else 0 for s, c in zip(cumsums, cumcounts)]\n",
    "    cummeans = np.zeros(len(cumarrs))\n",
    "    cumstds = np.zeros(len(cumarrs))\n",
    "    for key in cumarrs:\n",
    "        cummeans[key] = np.mean(cumarrs[key])\n",
    "        cumstds[key] = np.std(cumarrs[key])\n",
    "    \n",
    "    os.makedirs('./plots', exist_ok=True)\n",
    "    pickle.dump({'exp_bins': np.exp(bins), 'cum_means': cummeans, 'cum_stds': cumstds}, open('./plots/log-linear-all-datasets-plot-counttype_{}_prompttype_{}_ptdataset_{}_model_{}.pkl'.format('integrated_rampp0.7', prompt_type, pretrained_dataset, curr_model), 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f496c12c-921e-4fe5-abbf-c72eb74d68f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "combinations = [\n",
    "    ('RN50', 'cc3m'),\n",
    "    ('ViT-B-16', 'cc3m'),\n",
    "\n",
    "    ('RN50', 'cc12m'),\n",
    "    ('ViT-B-16', 'cc12m'),\n",
    "\n",
    "    ('RN50', 'yfcc15m'),\n",
    "    ('RN101', 'yfcc15m'),\n",
    "    ('ViT-B-16', 'yfcc15m'),\n",
    "\n",
    "    ('ViT-B-16', 'synthci30m'),\n",
    "\n",
    "    ('ViT-B-32', 'laion200m_train_test_sim_normalized'),\n",
    "\n",
    "    ('ViT-B-32', 'laion400m'),\n",
    "    ('ViT-B-16', 'laion400m'),\n",
    "    ('ViT-L-14', 'laion400m'),\n",
    "]\n",
    "\n",
    "prompts = [\n",
    "    'class',\n",
    "    'ensemble',\n",
    "    'simple',\n",
    "]\n",
    "\n",
    "for comb in combinations:\n",
    "    for prt in prompts:\n",
    "        save_plotting_info(comb[0], comb[1], prt)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
