{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from minicons import cwe\n",
    "\n",
    "import torch\n",
    "from torch.utils.data import DataLoader\n",
    "\n",
    "from wic_model import WiCModel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_wic(file = \"train\"):\n",
    "    row = [x.strip().split(\"\\t\") for x in open(f\"../data/WiC_dataset/{file}/{file}.data.txt\", \"r\").readlines()]\n",
    "    if not file == \"test\":\n",
    "        gold = [x.strip() for x in open(f\"../data/WiC_dataset/{file}/{file}.gold.txt\", \"r\").readlines()]\n",
    "    dataset = []\n",
    "    for i, data in enumerate(row):\n",
    "        word, pos, idx, sentence1, sentence2 = data\n",
    "        idx1, idx2 = idx.split('-')\n",
    "        idx1, idx2 = int(idx1), int(idx2)\n",
    "        \n",
    "        context1 = [sentence1, idx1]\n",
    "        context2 = [sentence2, idx2]\n",
    "        \n",
    "        if not file == \"test\":\n",
    "            label = gold[i]\n",
    "            dataset.append((context1, context2, pos, label))\n",
    "        else:\n",
    "            dataset.append((context1, context2, pos))\n",
    "            \n",
    "    return dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = load_wic('test')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = WiCModel.load_from_checkpoint(f'{auth1_path}/makesense_logs/wic/bert/12/version_512_4_1e-05.ckpt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.freeze()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_dl = DataLoader(test, batch_size = 32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def build_batch(self, batch, approximator = 0):\n",
    "\n",
    "    context1, context2, pos = batch\n",
    "    context1, context2 = [list(zip(*x)) for x in [context1, context2]]\n",
    "    context1 = [(c, [i.item(), i.item()+1]) for c, i in context1]\n",
    "    context2 = [(c, [i.item(), i.item()+1]) for c, i in context2]\n",
    "\n",
    "    c1 = self.cwe.extract_representation(context1, self.layer)\n",
    "    c2 = self.cwe.extract_representation(context2, self.layer)\n",
    "\n",
    "    c1 = c1.to(self.device)\n",
    "    c2 = c2.to(self.device)\n",
    "\n",
    "    if approximator != 1:\n",
    "        return torch.cat((c1, c2), dim = 1)\n",
    "    else:\n",
    "        c1 = self.approximator_model(c1)\n",
    "        c2 = self.approximator_model(c2)\n",
    "        return torch.cat((c1, c2), dim = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictions = []\n",
    "for i, batch in enumerate(test_dl):\n",
    "    x = build_batch(model, batch)\n",
    "        # x, y = batch\n",
    "    y_hat = model.decoder(model.encoder(x))\n",
    "    predicted = y_hat.argmax(1)\n",
    "    predictions.extend(predicted.tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictions_labels = ['T' if p == 1 else 'F' for p in predictions]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"layer_12_original.txt\", \"w\") as f:\n",
    "    f.write(\"\\n\".join(predictions_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "# with open(\"layer_9_original.txt\", \"w\") as f:\n",
    "#     f.write(\"\\n\".join(predictions_labels))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}