{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys \n",
    "sys.path.append('../../')\n",
    "\n",
    "from math import asin \n",
    "\n",
    "from core.passive_learning import *\n",
    "from core.auto_labeling import *\n",
    "from utils.logging_utils import * \n",
    "from datasets import dataset_factory  \n",
    "from datasets.dataset_utils import * \n",
    "from utils.counts import *  \n",
    "from utils.common_utils import * \n",
    "from utils.vis_utils import *\n",
    "import copy \n",
    "import random \n",
    "from calibration.calibration_utils import * \n",
    "\n",
    "from core.run_lib import *\n",
    "\n",
    "# configuration\n",
    "conf_dir = '../../configs/arxiv-configs/Cifar10-Vit/'\n",
    "\n",
    "pas_learn_conf_file = '{}/passive_learning_cifar10_vit_pytorch.yaml'.format(conf_dir)\n",
    "\n",
    "conf = load_yaml_config(pas_learn_conf_file)\n",
    "\n",
    "logger = get_logger('../../temp/logs/pl.log','PL',level=logging.DEBUG)\n",
    "\n",
    "conf['data_conf']['data_path'] = '../../data/'\n",
    "conf['data_conf']['val_fraction'] = 0.5\n",
    "train_conf = conf['training_conf']\n",
    "\n",
    "train_conf['optimizer']='sgd'\n",
    "train_conf['learning_rate']= 0.04\n",
    "train_conf['momentum']=0.9\n",
    "train_conf['weight_decay']=5e-4 \n",
    "\n",
    "train_conf['train_err_tol'] = -1\n",
    "\n",
    "train_conf['batch_size']= 64 #len(train_set)\n",
    "train_conf['shuffle']= True\n",
    "train_conf['max_epochs'] = 10\n",
    "\n",
    "train_conf['log_val_err']=True \n",
    "conf['auto_lbl_conf']['auto_label_err_threshold'] = 0.1\n",
    "conf['auto_lbl_conf']['C_1'] = 0.01"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "results = []\n",
    "i = 1\n",
    "n = 10000\n",
    "\n",
    "conf[\"random_seed\"]=i\n",
    "\n",
    "conf['training_conf']['seed_train_size'] = 2500\n",
    "\n",
    "conf['training_conf']['train_set_frac'] = n/(50000 *(1-conf['data_conf']['val_fraction']))\n",
    "set_seed(conf['random_seed'])\n",
    "\n",
    "# get data\n",
    "ds = dataset_factory.load_dataset(conf)\n",
    "ds.build_dataset()\n",
    "train_set,val_set_std = randomly_split_dataset(ds,1- conf['data_conf']['val_fraction'])\n",
    "# this val_set is the fixed val_set of big size. the algo will be given a subsample of it.\n",
    "\n",
    "auto_lbl_conf = conf['auto_lbl_conf']\n",
    "\n",
    "val_set_subset = val_set_std.get_random_fraction(auto_lbl_conf['val_frac_for_auto_lbl'])\n",
    "\n",
    "test_set = ds.get_test_datasets()\n",
    "logger.info('Loaded dataset {}'.format(conf['data_conf']['dataset']))\n",
    "logger.info('Unlabeled pool size: {} Validation Set Size:{}'.format(len(train_set),len(val_set_subset)))\n",
    "\n",
    "human_labeling_helper = HumanLabelingHelper(train_set.X,train_set.Y)\n",
    "\n",
    "pl = PassiveLearning(conf, train_set, human_labeling_helper, ds_val=val_set_subset,ds_test=test_set,logger=logger)\n",
    "out = pl.run()\n",
    "pl.get_counts(train_set,test_set)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "aistats",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.7.13"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
