{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys \n",
    "sys.path.append('../')\n",
    "from src.utils.run_lib import * \n",
    "\n",
    "from src.core.passive_learning import *\n",
    "from src.core.auto_labeling import *\n",
    "from src.utils.logging_utils import * \n",
    "from src.datasets import dataset_factory  \n",
    "from src.datasets.dataset_utils import * \n",
    "from src.utils.counting_utils import *  \n",
    "from src.utils.common_utils import * \n",
    "from src.utils.vis_utils import *\n",
    "import copy \n",
    "import random\n",
    "\n",
    "from omegaconf import OmegaConf\n",
    "#from calibration_utils import * \n",
    "from  src.datasets.data_manager import * \n",
    "\n",
    "from src.calibration.calibration_utils import * \n",
    "\n",
    "# configuration\n",
    "conf_dir = '../configs/calib-exp/'\n",
    "\n",
    "act_learn_conf_file = '{}/twenty_newsgroups_base_conf.yaml'.format(conf_dir)\n",
    "\n",
    "#conf = load_yaml_config(pas_learn_conf_file)\n",
    "conf = OmegaConf.load(act_learn_conf_file)\n",
    "\n",
    "conf.data_conf.compute_emb = False \n",
    "\n",
    "#run_conf(conf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_seed(conf['random_seed'])\n",
    "dm = DataManager(conf,logger) # 11314 train, 7532 test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(dm.ds_std_train), len(dm.ds_std_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(dm.ds_std_train[0][0]).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "logger   = get_logger('../../../temp/logs/twenty_newsgroup_act_learning.log',stdout_redirect=True,level=logging.DEBUG)\n",
    "\n",
    "act_lbl = ActiveLabeling(conf,dm,logger)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "act_lbl.init()\n",
    "\n",
    "lst_epoch_out = act_lbl.run_al_loop()\n",
    "\n",
    "# logger.info('AL Loop Done')\n",
    "# #test_err = al.get_test_error(al.cur_clf,test_set,conf['inference_conf'])\n",
    "# out =  dm.get_auto_labeling_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "conf['train_pts_query_conf']['seed_train_size']= 10 \n",
    "conf['train_pts_query_conf']['max_num_train_pts']= 50\n",
    "\n",
    "conf['training_conf']['train_err_tol'] = -1\n",
    "conf['training_conf']['max_epochs'] = 50\n",
    "conf['training_conf']['weight_decay'] = 0.0\n",
    "conf['training_conf']['learning_rate'] = 1.0\n",
    "conf['training_conf']['use_lr_schedule'] = False \n",
    "conf['training_conf']['optimizer_name'] = 'sgd'\n",
    "conf['training_conf']['batch_size'] = 32\n",
    "conf['training_conf']['momentum'] = 0.9\n",
    "conf['training_conf']['log_batch_loss_freq']=-1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "logger   = get_logger('../../../temp/logs/twenty_newsgroup_passive_learning.log',stdout_redirect=True,level=logging.DEBUG)\n",
    "\n",
    "set_seed(conf['random_seed'])\n",
    "\n",
    "dm = DataManager(conf,logger)\n",
    "len(dm.ds_std_train), len(dm.ds_std_val)\n",
    "\n",
    "\n",
    "pl = PassiveLearning(conf,dm,logger)\n",
    "\n",
    "out = pl.run()\n",
    "\n",
    "w = pl.cur_clf.get_weights()\n",
    "print(torch.norm(w))\n",
    "test_err = get_test_error(pl.cur_clf,dm.ds_std_test,conf['inference_conf'])\n",
    "print(test_err)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cur_val_ds,cur_val_idcs=dm.get_current_validation_data()\n",
    "\n",
    "val_set_subset = cur_val_ds\n",
    "\n",
    "val_inf_out = pl.cur_clf.predict(val_set_subset,conf['inference_conf'])\n",
    "cal_out = compute_calibration(val_set_subset.Y.numpy(), val_inf_out['labels'].numpy(), val_inf_out['confidence'].numpy(), num_bins=10)\n",
    "print(cal_out['expected_calibration_error'])\n",
    "ax = plt.subplot(111)\n",
    "reliability_diagram_subplot(ax,cal_out)\n",
    "\n",
    "plt.figure() \n",
    "o = val_inf_out\n",
    "\n",
    "m = len(o['confidence'])\n",
    "S = np.zeros((m,4))\n",
    "S[:,0] = o['confidence']\n",
    "S[:,1] = o['labels'] \n",
    "S[:,2] = val_set_subset.Y\n",
    "S[:,3] = S[:,1]==S[:,2]\n",
    "\n",
    "S = S[(-S[:,0]).argsort()]\n",
    " \n",
    "bins = np.arange(0.1,1.1,0.05)\n",
    "labels = ['corrects','incorrects']\n",
    "plt.hist([S[S[:,3]==1,0],S[S[:,3]==0,0]],bins=bins,rwidth=0.5, histtype ='bar',label=labels)\n",
    "plt.legend()\n",
    "plt.ylabel('Count')\n",
    "plt.xlabel('Confidence')\n",
    "plt.grid() \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ml-kernel-tbal",
   "language": "python",
   "name": "ml-kernel-tbal"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.10"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
