{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load modules, mainly huggingface basic model handlers.\n",
    "# Make sure you install huggingface and other packages properly.\n",
    "from collections import Counter\n",
    "import json\n",
    "\n",
    "from nltk.tokenize import TweetTokenizer\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.feature_extraction import DictVectorizer\n",
    "from sklearn.feature_extraction.text import TfidfTransformer\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.naive_bayes import MultinomialNB\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.metrics import classification_report\n",
    "\n",
    "import logging\n",
    "logger = logging.getLogger(__name__)\n",
    "# Setup logging\n",
    "logging.basicConfig(\n",
    "    format=\"%(asctime)s - %(levelname)s - %(name)s -   %(message)s\",\n",
    "    datefmt=\"%m/%d/%Y %H:%M:%S\",\n",
    "    level=logging.INFO,\n",
    ")\n",
    "\n",
    "import os\n",
    "import random\n",
    "import sys\n",
    "from dataclasses import dataclass, field\n",
    "from typing import Optional\n",
    "import torch\n",
    "import argparse\n",
    "import numpy as np\n",
    "from datasets import load_dataset, load_metric\n",
    "from datasets import Dataset\n",
    "import pandas as pd\n",
    "\n",
    "import transformers\n",
    "from transformers import (\n",
    "    AutoConfig,\n",
    "    AutoModelForSequenceClassification,\n",
    "    AutoTokenizer,\n",
    "    EvalPrediction,\n",
    "    HfArgumentParser,\n",
    "    PretrainedConfig,\n",
    "    Trainer,\n",
    "    TrainingArguments,\n",
    "    default_data_collator,\n",
    "    set_seed,\n",
    ")\n",
    "from transformers.trainer_utils import is_main_process"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class HuggingFaceRoBERTaBase:\n",
    "    \"\"\"\n",
    "    An extension for evaluation based off the huggingface module.\n",
    "    \"\"\"\n",
    "    def __init__(self, tokenizer, model, task_config):\n",
    "        self.task_config = task_config\n",
    "        self.tokenizer = tokenizer\n",
    "        self.model = model\n",
    "        \n",
    "    def evaluation(self, data_path, training_args, max_length=128, csv_source=\"all\"):\n",
    "\n",
    "        print(\"*** Evaluate with %s ***\"%(data_path))\n",
    "        \n",
    "        eval_df = pd.read_csv(data_path , delimiter=\"\\t\")\n",
    "        # if source is not all, we need to filter rows based on the source column\n",
    "        if csv_source != \"all\":\n",
    "            eval_df = eval_df[eval_df[\"source\"]==csv_source]\n",
    "        datasets = {}\n",
    "        datasets[\"validation\"] = Dataset.from_pandas(eval_df)\n",
    "        \n",
    "        label_list = datasets[\"validation\"].unique(\"label\")\n",
    "        label_list.sort()  # Let's sort it for determinism\n",
    "\n",
    "        padding = \"max_length\"\n",
    "        sentence1_key, sentence2_key = self.task_config\n",
    "        label_to_id = None\n",
    "        def preprocess_function(examples):\n",
    "            # Tokenize the texts\n",
    "            args = (\n",
    "                (examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key])\n",
    "            )\n",
    "            result = self.tokenizer(*args, padding=padding, max_length=max_length, truncation=True)\n",
    "            # Map labels to IDs (not necessary for GLUE tasks)\n",
    "            if label_to_id is not None and \"label\" in examples:\n",
    "                result[\"label\"] = [label_to_id[l] for l in examples[\"label\"]]\n",
    "            return result\n",
    "        datasets[\"validation\"] = datasets[\"validation\"].map(preprocess_function, batched=True)\n",
    "        \n",
    "        eval_dataset = datasets[\"validation\"]\n",
    "        \n",
    "        metric = load_metric(\"glue\", \"sst2\") # any glue task will do the job, just for eval loss\n",
    "        \n",
    "        def dynasent_compute_metrics(p: EvalPrediction):\n",
    "            preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions\n",
    "            preds = np.argmax(preds, axis=1)\n",
    "            result_to_print = classification_report(p.label_ids, preds, digits=5, output_dict=True)\n",
    "            print(classification_report(p.label_ids, preds, digits=5))\n",
    "            result_to_return = metric.compute(predictions=preds, references=p.label_ids)\n",
    "            result_to_return[\"Macro-F1\"] = result_to_print[\"macro avg\"][\"f1-score\"]\n",
    "            return result_to_return\n",
    "        \n",
    "        # Initialize our Trainer. We are only intersted in evaluations\n",
    "        trainer = Trainer(\n",
    "            model=model,\n",
    "            args=training_args,\n",
    "            eval_dataset=eval_dataset,\n",
    "            compute_metrics=dynasent_compute_metrics,\n",
    "            tokenizer=self.tokenizer,\n",
    "            # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding.\n",
    "            data_collator=default_data_collator\n",
    "        )\n",
    "        eval_result = trainer.evaluate(eval_dataset=eval_dataset)\n",
    "        \n",
    "        print(\"*** Loss and GLUE AUC ***\")\n",
    "        print(eval_result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if __name__ == \"__main__\":\n",
    "    parser = argparse.ArgumentParser()\n",
    "\n",
    "    ## Required parameters\n",
    "    parser.add_argument(\"--task_name\",\n",
    "                        default=\"dynasent\",\n",
    "                        type=str)\n",
    "    parser.add_argument(\"--data_path\",\n",
    "                        default=\"../datasets/round0/round0-dev.tsv\",\n",
    "                        type=str,\n",
    "                        help=\"The input data dir. Should contain the .tsv files (or other data files) for the task.\")\n",
    "    parser.add_argument(\"--model_path\",\n",
    "                        default=\"../saved_models/pytorch_model.bin\",\n",
    "                        type=str,\n",
    "                        help=\"The pretrained model binary file.\")\n",
    "    parser.add_argument(\"--model_type\",\n",
    "                        default=\"roberta-base\",\n",
    "                        type=str,\n",
    "                        help=\"The pretrained model binary file.\")\n",
    "    parser.add_argument(\"--no_cuda\",\n",
    "                        default=False,\n",
    "                        action='store_true',\n",
    "                        help=\"Whether not to use CUDA when available\")\n",
    "    parser.add_argument(\"--cache_dir\",\n",
    "                        default=\"../tmp/\",\n",
    "                        type=str,\n",
    "                        help=\"Cache directory for the evaluation pipeline (not HF cache).\")\n",
    "    parser.add_argument(\"--max_seq_length\",\n",
    "                        default=128,\n",
    "                        type=int,\n",
    "                        help=\"The maximum total input sequence length after WordPiece tokenization. \\n\"\n",
    "                                \"Sequences longer than this will be truncated, and sequences shorter \\n\"\n",
    "                                \"than this will be padded.\")\n",
    "    parser.add_argument(\"--per_device_eval_batch_size\",\n",
    "                        default=8,\n",
    "                        type=int,\n",
    "                        help=\"The batch size per device for evaluation.\")\n",
    "    parser.add_argument(\"--is_tensorboard\",\n",
    "                        default=False,\n",
    "                        action='store_true',\n",
    "                        help=\"If tensorboard is connected.\")\n",
    "    parser.add_argument(\"--csv_source\",\n",
    "                        default=\"all\",\n",
    "                        type=str,\n",
    "                        help=\"If the csv file has a source column, only source from this indicated source\"\n",
    "                             \" will be used to evaluate.\")\n",
    "    parser.add_argument(\"--embeddings_path\",\n",
    "                        default=\"\",\n",
    "                        type=str,\n",
    "                        help=\"The embedding file to swap.\")\n",
    "    try:\n",
    "        get_ipython().run_line_magic('matplotlib', 'inline')\n",
    "        args = parser.parse_args([])\n",
    "    except:\n",
    "        args = parser.parse_args()\n",
    "    # os.environ[\"WANDB_DISABLED\"] = \"false\" if args.is_tensorboard else \"true\"\n",
    "    os.environ[\"TRANSFORMERS_CACHE\"] = \"../huggingface_cache/\"\n",
    "    # if cache does not exist, create one\n",
    "    if not os.path.exists(os.environ[\"TRANSFORMERS_CACHE\"]): \n",
    "        os.makedirs(os.environ[\"TRANSFORMERS_CACHE\"])\n",
    "\n",
    "    training_args = TrainingArguments(\"tmp_trainer\")\n",
    "    training_args.no_cuda = args.no_cuda\n",
    "    training_args.per_device_eval_batch_size = args.per_device_eval_batch_size\n",
    "    training_args.per_gpu_eval_batch_size = args.per_device_eval_batch_size\n",
    "    training_args_dict = training_args.to_dict()\n",
    "    _n_gpu = training_args_dict[\"_n_gpu\"]\n",
    "    del training_args_dict[\"_n_gpu\"]\n",
    "    training_args_dict[\"n_gpu\"] = _n_gpu\n",
    "    HfParser = HfArgumentParser((TrainingArguments))\n",
    "    training_args = HfParser.parse_dict(training_args_dict)[0]\n",
    "\n",
    "    TASK_CONFIG = {\n",
    "        \"classification\": (\"text\", None)\n",
    "    }\n",
    "\n",
    "    # Load pretrained model and tokenizer\n",
    "    NUM_LABELS = 3\n",
    "    MAX_SEQ_LEN = 128\n",
    "    config = AutoConfig.from_pretrained(\n",
    "        args.model_type,\n",
    "        num_labels=3,\n",
    "        finetuning_task=args.task_name,\n",
    "        cache_dir=args.cache_dir\n",
    "    )\n",
    "    tokenizer = AutoTokenizer.from_pretrained(\n",
    "        args.model_type,\n",
    "        use_fast=False,\n",
    "        cache_dir=args.cache_dir\n",
    "    )\n",
    "    model = AutoModelForSequenceClassification.from_pretrained(\n",
    "        args.model_path,\n",
    "        from_tf=False,\n",
    "        config=config,\n",
    "        cache_dir=args.cache_dir\n",
    "    )\n",
    "    if len(args.embeddings_path) != 0:\n",
    "        logger.info(\"***** Loading an new embedding file to the model *****\")\n",
    "        logger.info(\"***** You are evaluating sort of zero-shot here!!! *****\")\n",
    "        transformed_weight = torch.load(args.embeddings_path)\n",
    "        model.bert.embeddings.word_embeddings.weight.data = transformed_weight.data\n",
    "    \n",
    "    eval_pipeline = HuggingFaceRoBERTaBase(tokenizer, model, TASK_CONFIG[args.task_name])\n",
    "        \n",
    "    eval_pipeline.evaluation(args.data_path, training_args, max_length=args.max_seq_length, \n",
    "                             csv_source=args.csv_source)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
