{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import utils.plothelp as ph\n",
    "import matplotlib.pyplot as plt \n",
    "FIG_PATH = 'figs_analyze_toy_noise_mult_noise/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm.notebook import tqdm\n",
    "import pickle \n",
    "import os \n",
    "DATA_FOLDER = 'data/'\n",
    "DROP_MATRICES = True\n",
    "POSTPENDS=['','_log','_M*','_normed','_lognormed']\n",
    "\n",
    "folders_to_check = ['sweep_toy_noise_mult_noise']\n",
    "matrix_keys = ['W_K','W_symm_K','eigv']\n",
    "matrix_keys += [key + postpend for key in matrix_keys for postpend in POSTPENDS]\n",
    "sims = [] \n",
    "for folder in tqdm(folders_to_check,desc='All folders'):\n",
    "    folder_path = DATA_FOLDER + folder\n",
    "    print(f'Loading data from {folder_path}')\n",
    "    for sim_ind in tqdm(range(70),desc='Data from %s'%folder, leave=False):\n",
    "        sim_path = f'{folder_path}/{sim_ind:04d}/sims.pickle'\n",
    "        if not os.path.exists(sim_path):\n",
    "            continue\n",
    "        with open(sim_path, 'rb') as f:\n",
    "            data = pickle.load(f)\n",
    "        if DROP_MATRICES:\n",
    "            # Drop the matrices from the data\n",
    "            for exp in data['experiments']:\n",
    "                for key in matrix_keys:\n",
    "                    if key in exp:\n",
    "                        del exp[key]\n",
    "        sims.append(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#now, we need to perform averaging. \n",
    "experiments = [exp for sim in sims for exp in sim['experiments']]\n",
    "replicates = [rep for sim in sims for rep in sim['replicate_averages']]\n",
    "params_from_args = ['P_noise_scale']\n",
    "for param in params_from_args:\n",
    "    for exp in experiments+replicates:\n",
    "        exp[param] = exp['args'].__getattribute__(param)\n",
    "\n",
    "\n",
    "#first, we need to get all of the replicates with the same parameters into a single dictionary\n",
    "def experiment_tuple_from_rep(rep):\n",
    "    return (rep['d'],rep['noise_type'],rep['noise_param'],rep['P_noise_scale'])\n",
    "unique_replicate_averages = {} \n",
    "for sim in sims:\n",
    "    for rep in sim['replicate_averages']:\n",
    "        tuple = experiment_tuple_from_rep(rep)\n",
    "        if(not( tuple in unique_replicate_averages.keys() )):\n",
    "            unique_replicate_averages[tuple] = { k:v for k,v in rep.items() if k != 'experiments' }\n",
    "            unique_replicate_averages[tuple]['experiments'] = [] \n",
    "        unique_replicate_averages[tuple]['experiments']+=rep['experiments']\n",
    "replicate_averages = list(  unique_replicate_averages.values() )\n",
    "\n",
    "#counting negative eigenvalues:\n",
    "for exp in experiments:\n",
    "    for postpend  in POSTPENDS:\n",
    "        exp['eigs_fraction_positive'+postpend]=fraction_positive_eigs = (np.cumsum((exp['eigs'+postpend][::-1] > 0))) / (1+np.arange(exp['eigs'+postpend].shape[0]))\n",
    "        exp['eigs_fraction_positive_all'+postpend]=fraction_all_positive = (exp['eigs'+postpend][::-1]>0).cumsum()-1 ==np.arange(exp['eigs'+postpend].shape[0])\n",
    "\n",
    "#measuring eigenvalue spectra:\n",
    "eig_bins = ph.buildGeomBins(1e-3,1e3,100)\n",
    "eig_binc = ph.binCenters(eig_bins)\n",
    "eig_binw = np.diff(eig_bins)\n",
    "for replicate_average in replicate_averages: #tqdm(replicate_averages):\n",
    "    for postpend in POSTPENDS:\n",
    "        counts = np.sum([np.histogram(exp['eigs'+postpend],bins=eig_bins)[0] for exp in replicate_average['experiments']],axis = 0)\n",
    "        replicate_average['eig_hist'+postpend] = counts / (eig_binw * np.sum(counts))\n",
    "        replicate_average['eig_hist_binc'] = eig_binc\n",
    "\n",
    "\n",
    "\n",
    "print('Now, performing averaging!')\n",
    "for replicate_average in tqdm(replicate_averages):\n",
    "    keys_to_average = ['polarization_K', 'polarization_std_K', 'expressivity_K', 'analogy_exact_K', 'analogy_mse_K', 'analogy_nearest_K','eigs_fraction_positive','eigs_fraction_positive_all']\n",
    "    for postpend in POSTPENDS:\n",
    "        for key in keys_to_average:\n",
    "            replicate_average[key+postpend] = np.mean([exp[key+postpend] for exp in replicate_average['experiments']],axis=0)\n",
    "            replicate_average[key+postpend+'_std'] = np.std([exp[key+postpend] for exp in replicate_average['experiments']],axis=0)\n",
    "    replicate_average['Ks'] = replicate_average['experiments'][0]['Ks']\n",
    "print(\"Saving... \",end='')\n",
    "summary = {'replicate_averages':replicate_averages,'experiments':experiments}\n",
    "with open(DATA_FOLDER+'sweep_toy_noise_mult_noise_summary.pickle','wb') as f:\n",
    "    pickle.dump(summary,f)\n",
    "print('Done!')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
