{
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "!pip install transformers accelerate -U"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "1ymE0R3EuIZd",
        "outputId": "014eff3e-f4f2-43bd-9688-19db92ad8d2c"
      },
      "id": "1ymE0R3EuIZd",
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.32.1)\n",
            "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.22.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.16.4)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.23.5)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.6.3)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n",
            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n",
            "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.3)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.1)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n",
            "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.0.1+cu118)\n",
            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers) (2023.6.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.15.1->transformers) (4.7.1)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.2)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.0.0)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (3.27.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.10.0->accelerate) (16.0.6)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.2.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import reservoirtransformers\n",
        "import math\n",
        "import os\n",
        "import warnings\n",
        "from dataclasses import dataclass\n",
        "from typing import List, Optional, Tuple, Union\n",
        "\n",
        "import torch\n",
        "import torch.utils.checkpoint\n",
        "from torch import nn\n",
        "from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss\n",
        "import torch.nn.functional as F"
      ],
      "metadata": {
        "id": "mhmjveElujN2"
      },
      "id": "mhmjveElujN2",
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "id": "7eb58cec",
      "metadata": {
        "id": "7eb58cec"
      },
      "outputs": [],
      "source": [
        "from configuration import ReservoirTConfig\n",
        "\n",
        "configuration = ReservoirTConfig()\n",
        "\n",
        "configuration.output_size=7\n",
        "configuration.re_output_size=20\n",
        "configuration.max_sequence_length=1399\n",
        "configuration.sequence_length=384\n",
        "configuration.pred_len=720\n",
        "configuration.hidden_size=8\n",
        "configuration.num_attention_heads=4\n",
        "configuration.hidden_dropout_prob=0.0\n",
        "configuration.num_hidden_layers=4\n",
        "configuration.num_reservoirs = 10\n",
        "configuration.reservoir_size = [30, 15, 20, 25, 30, 35, 40, 45, 50, 50]\n",
        "configuration.spectral_radius = [0.6, 0.8, 0.55, 0.6, 0.5, 0.4, 0.3, 0.2, 0.81, 0.05]\n",
        "configuration.sparsity = [0.6, 0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15]\n",
        "configuration.leaky = [0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39]\n",
        "configuration.attention_probs_dropout_prob=0.0\n",
        "#bert_model = TabularBertForRegression(config=configuration).to(\"cpu\", dtype=float)\n",
        "model = reservoirtransformers.ReservoirTTimeSeries(config=configuration).to(\"cpu\", dtype=float)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "id": "0f6bf41e",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 70
        },
        "id": "0f6bf41e",
        "outputId": "237e4f79-5b06-4925-ee35-d73224a6d2d4"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "\"\\nzero_col = np.zeros((X.shape[0], 1))\\n\\n# Concatenate the original array with the zero column along the second axis (columns)\\nX = np.hstack((X, zero_col))\\n#X =  dataset.drop(['ate'], axis = 1).values\\n\\n#X_train, X_test, y_train, y_test =train_test_split(X.values, y, test_size=0.2, shuffle=False)\\n\""
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 3
        }
      ],
      "source": [
        "import numpy as np\n",
        "# prepare data for lstm\n",
        "from sklearn.preprocessing import StandardScaler\n",
        "from pandas import read_csv\n",
        "from pandas import DataFrame\n",
        "import random\n",
        "from sklearn.model_selection import train_test_split\n",
        "from pandas import concat\n",
        "from sklearn.preprocessing import LabelEncoder\n",
        "from sklearn.preprocessing import MinMaxScaler\n",
        "dataset= read_csv('/content/ETTm2.csv')\n",
        "dataset=dataset.dropna()\n",
        "dataset = dataset.drop(['date'], axis = 1)\n",
        "dataset = dataset.dropna()\n",
        "#data = dataset.values[0:14000]\n",
        "\n",
        "\n",
        "y = dataset.OT.values\n",
        "\n",
        "\n",
        "X = dataset.values\n",
        "\n",
        "scaler = StandardScaler()\n",
        "X = scaler.fit_transform(X)\n",
        "\n",
        "\n",
        "\n",
        "#X=X[1:]\n",
        "\n",
        "#Reservoir_id = np.array([[0] * len(X[0])] + X[:-1].tolist())\n",
        "# Create a zero column of shape (100, 1)\n",
        "'''\n",
        "zero_col = np.zeros((X.shape[0], 1))\n",
        "\n",
        "# Concatenate the original array with the zero column along the second axis (columns)\n",
        "X = np.hstack((X, zero_col))\n",
        "#X =  dataset.drop(['ate'], axis = 1).values\n",
        "\n",
        "#X_train, X_test, y_train, y_test =train_test_split(X.values, y, test_size=0.2, shuffle=False)\n",
        "'''"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "id": "baffd6ba",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "baffd6ba",
        "outputId": "5f05db1d-37d3-4adc-d252-d0d96ab90597"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(69680, 7)"
            ]
          },
          "metadata": {},
          "execution_count": 5
        }
      ],
      "source": [
        "X.shape"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "id": "6d0706c5",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 138,
          "referenced_widgets": [
            "e6ea54fa204f420abc9f819282db1296",
            "4c4534e14e5042dd82da2adddd3d1e05",
            "8716801d7dd64129a8fdb2565abece36",
            "eb798316f734455b84d2fe4a3f97bde2",
            "a07175c34b8147359c110edd1a0bf77c",
            "ef2730c3d0be4e8a8c3116a8f7572e1e",
            "2ad8c039f15a4f69acea32560f9142fb",
            "8cd998c846c1484b94b6303bf4da4dfa",
            "2f6e423409b9428fbc507343e7818f60",
            "bd7ebe4158de456994de17fc28039eca",
            "bcf2168e5c5e4834b1100834fa058ffa"
          ]
        },
        "id": "6d0706c5",
        "outputId": "285805db-f558-4030-d6cf-d00d435953a2"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "creating seq\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/68577 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "e6ea54fa204f420abc9f819282db1296"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "torch.Size([68577, 384, 7])\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-4-1feea5ab717f>:11: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ../torch/csrc/utils/tensor_new.cpp:245.)\n",
            "  return torch.tensor(sequences), torch.tensor(targets)\n"
          ]
        }
      ],
      "source": [
        "from tqdm.auto import tqdm\n",
        "# 1. Preprocess the data into the required format\n",
        "def create_sequences(data, seq_length, pred_length):\n",
        "    sequences = []\n",
        "    targets = []\n",
        "    print('creating seq')\n",
        "    for i in tqdm(range(len(data) - seq_length - pred_length + 1)):\n",
        "\n",
        "        sequences.append(data[i:i+seq_length])\n",
        "        targets.append(data[i+seq_length:i+seq_length+pred_length])\n",
        "    return torch.tensor(sequences), torch.tensor(targets)\n",
        "\n",
        "X, y = create_sequences(data=X, seq_length=configuration.sequence_length, pred_length=configuration.pred_len)\n",
        "# Zeros tensor of shape [16941, 384, 1]\n",
        "print(X.shape)\n",
        "zeros = torch.zeros((X.size(0), X.size(1), 1), dtype=X.dtype)\n",
        "\n",
        "# Concatenate along the last dimension\n",
        "X = torch.cat((X, zeros), dim=-1)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "id": "3e13e0d0",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3e13e0d0",
        "outputId": "8faccd22-70ec-4485-ac84-733a08027c42"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(torch.Size([69201, 384, 10]), torch.Size([69201, 96, 7]))"
            ]
          },
          "metadata": {},
          "execution_count": 7
        }
      ],
      "source": [
        "X.shape, y.shape"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "id": "7955f82a",
      "metadata": {
        "id": "7955f82a"
      },
      "outputs": [],
      "source": [
        "\n",
        "batch=64\n",
        "indices = np.arange(len(X))\n",
        "barrier = int(len(indices)/batch)*batch\n",
        "indices = indices[0:barrier]\n",
        "soft_border = int((configuration.sequence_length/batch))+16\n",
        "\n",
        "indices = [indices[i:i+batch] for i in range(0, len(indices), batch)]\n",
        "\n",
        "border1 = int(len(indices)*0.7)\n",
        "border2 = border1+int(len(indices)*0.1)\n",
        "border3 = border2+int(len(indices)*0.2)\n",
        "\n",
        "train_ind = indices[0:border1]\n",
        "val_ind = indices[border1-soft_border: border2]\n",
        "test_ind = indices[border2-soft_border: border3]\n",
        "\n",
        "random.shuffle(train_ind)\n",
        "random.shuffle(val_ind)\n",
        "#random.shuffle(test_ind)\n",
        "\n",
        "\n",
        "X_train = [X[item] for sublist in train_ind for item in sublist]\n",
        "y_train = [y[item] for sublist in train_ind for item in sublist]\n",
        "\n",
        "X_val = [X[item] for sublist in val_ind for item in sublist]\n",
        "y_val = [y[item] for sublist in val_ind for item in sublist]\n",
        "\n",
        "X_test = [X[item] for sublist in test_ind for item in sublist]\n",
        "y_test = [y[item] for sublist in test_ind for item in sublist]\n",
        "\n",
        "#train_indices, test_indices =train_test_split(indices,  test_size=0.2, shuffle=False)\n",
        "#indices = [item for sublist in indices for item in sublist]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "id": "26d5bda1",
      "metadata": {
        "id": "26d5bda1"
      },
      "outputs": [],
      "source": [
        "import torch\n",
        "from torch.utils.data import Dataset, DataLoader\n",
        "\n",
        "class CustomDataset(Dataset):\n",
        "    def __init__(self, tokenized_inputs,  labels=None, pos=None):\n",
        "        self.tokenized_inputs = tokenized_inputs\n",
        "        self.labels = labels\n",
        "        self.pos = pos\n",
        "        self.id_list = None\n",
        "        self.re = None\n",
        "\n",
        "    def __len__(self):\n",
        "        return len(self.tokenized_inputs)\n",
        "\n",
        "    def __getitem__(self, idx):\n",
        "        if self.labels is not None:\n",
        "            return {\n",
        "                \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
        "                \"labels_ids\": torch.tensor(self.labels[idx]),\n",
        "                #\"id\": torch.tensor(self.id_list[idx]),  # Include the id directly\n",
        "                #\"reservoir_ids\": torch.tensor(self.re[idx]),\n",
        "            }\n",
        "        else:\n",
        "            return {\n",
        "                \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
        "            }\n",
        "\n",
        "# Assuming you have X_train, y_train, X_test, y_test, trainpos, and testpos defined\n",
        "\n",
        "\n",
        "train_dataset = CustomDataset(X_train, y_train)\n",
        "\n",
        "test_dataset = CustomDataset(X_test, y_test)\n",
        "\n",
        "val_dataset = CustomDataset(X_val, y_val)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "id": "13806390",
      "metadata": {
        "id": "13806390"
      },
      "outputs": [],
      "source": [
        "from transformers import Trainer, TrainingArguments\n",
        "from torch.utils.data import DataLoader\n",
        "from torch.cuda.amp import GradScaler\n",
        "class CustomTrainer(Trainer):\n",
        "    def __init__(self, *args, gradient_accumulation_steps=1, **kwargs):\n",
        "        super().__init__(*args, **kwargs)\n",
        "        self.gradient_accumulation_steps = gradient_accumulation_steps\n",
        "        self.scaler = GradScaler()\n",
        "\n",
        "    def training_step(self, model, inputs):\n",
        "        model.train()\n",
        "        inputs = self._prepare_inputs(inputs)\n",
        "        loss = self.compute_loss(model, inputs)\n",
        "\n",
        "        if self.args.n_gpu > 1:\n",
        "            loss = loss.mean()  # mean() to average on multi-gpu parallel training\n",
        "\n",
        "        loss = loss / self.gradient_accumulation_steps\n",
        "        self.scaler.scale(loss).backward()\n",
        "\n",
        "        return loss.detach()\n",
        "\n",
        "\n",
        "    def get_train_dataloader(self) -> DataLoader:\n",
        "        \"\"\"\n",
        "        Returns the training [`~torch.utils.data.DataLoader`].\n",
        "        Will use no sampler if `train_dataset` does not implement `__len__`, a random sampler (adapted to distributed\n",
        "        training if necessary) otherwise.\n",
        "        Subclass and override this method if you want to inject some custom behavior.\n",
        "        \"\"\"\n",
        "        if self.train_dataset is None:\n",
        "            raise ValueError(\"Trainer: training requires a train_dataset.\")\n",
        "\n",
        "        train_dataset = self.train_dataset\n",
        "\n",
        "\n",
        "        loader =  DataLoader(\n",
        "            train_dataset,\n",
        "            batch_size=self._train_batch_size,\n",
        "            drop_last=self.args.dataloader_drop_last,\n",
        "            shuffle = False,\n",
        "        )\n",
        "        return loader\n",
        "\n",
        "#bert_model = TabularBertForSequenceClassification(config=configuration).to(\"cpu\", dtype=float)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "id": "cb4d10de",
      "metadata": {
        "scrolled": true,
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "cb4d10de",
        "outputId": "a614ae40-b891-4209-f6a7-fc70c8c9cdb8"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n",
            "Could not estimate the number of tokens of the input, floating-point operations will not be computed\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 1.0523, 'learning_rate': 0.0009995549621717847, 'epoch': 0.07}\n",
            "{'eval_loss': 1.2335270464091228, 'eval_r2_score': -0.800989874865758, 'eval_mse': 1.2335270464091221, 'eval_runtime': 49.0314, 'eval_samples_per_second': 168.382, 'eval_steps_per_second': 2.631, 'epoch': 0.07}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.966, 'learning_rate': 0.0009991099243435691, 'epoch': 0.13}\n",
            "{'eval_loss': 0.6462965452913999, 'eval_r2_score': 0.05638588337488515, 'eval_mse': 0.6462965452914005, 'eval_runtime': 48.8686, 'eval_samples_per_second': 168.943, 'eval_steps_per_second': 2.64, 'epoch': 0.13}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.576, 'learning_rate': 0.0009986648865153538, 'epoch': 0.2}\n",
            "{'eval_loss': 0.47355189392649844, 'eval_r2_score': 0.30859873022819717, 'eval_mse': 0.47355189392649927, 'eval_runtime': 48.1401, 'eval_samples_per_second': 171.499, 'eval_steps_per_second': 2.68, 'epoch': 0.2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.4837, 'learning_rate': 0.0009982198486871384, 'epoch': 0.27}\n",
            "{'eval_loss': 0.3707228360179964, 'eval_r2_score': 0.45873252151741883, 'eval_mse': 0.37072283601799705, 'eval_runtime': 48.3241, 'eval_samples_per_second': 170.846, 'eval_steps_per_second': 2.669, 'epoch': 0.27}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.3206, 'learning_rate': 0.000997774810858923, 'epoch': 0.33}\n",
            "{'eval_loss': 0.3182936808122538, 'eval_r2_score': 0.535280804709251, 'eval_mse': 0.3182936808122534, 'eval_runtime': 47.8121, 'eval_samples_per_second': 172.676, 'eval_steps_per_second': 2.698, 'epoch': 0.33}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.5884, 'learning_rate': 0.0009973297730307075, 'epoch': 0.4}\n",
            "{'eval_loss': 0.3915381422533218, 'eval_r2_score': 0.4283414928965388, 'eval_mse': 0.39153814225332145, 'eval_runtime': 48.1897, 'eval_samples_per_second': 171.323, 'eval_steps_per_second': 2.677, 'epoch': 0.4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.3511, 'learning_rate': 0.0009968847352024922, 'epoch': 0.47}\n",
            "{'eval_loss': 0.3259395071066321, 'eval_r2_score': 0.5241176479862244, 'eval_mse': 0.3259395071066314, 'eval_runtime': 48.7047, 'eval_samples_per_second': 169.511, 'eval_steps_per_second': 2.649, 'epoch': 0.47}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.653, 'learning_rate': 0.0009964396973742768, 'epoch': 0.53}\n",
            "{'eval_loss': 0.39092635146771937, 'eval_r2_score': 0.42923472747425984, 'eval_mse': 0.3909263514677187, 'eval_runtime': 48.2191, 'eval_samples_per_second': 171.218, 'eval_steps_per_second': 2.675, 'epoch': 0.53}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.586, 'learning_rate': 0.0009959946595460615, 'epoch': 0.6}\n",
            "{'eval_loss': 0.3854464435070594, 'eval_r2_score': 0.4372355725153746, 'eval_mse': 0.38544644350705953, 'eval_runtime': 47.8366, 'eval_samples_per_second': 172.587, 'eval_steps_per_second': 2.697, 'epoch': 0.6}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.5493, 'learning_rate': 0.0009955496217178461, 'epoch': 0.67}\n",
            "{'eval_loss': 0.3988515970012291, 'eval_r2_score': 0.4176636095135893, 'eval_mse': 0.3988515970012292, 'eval_runtime': 48.2693, 'eval_samples_per_second': 171.04, 'eval_steps_per_second': 2.673, 'epoch': 0.67}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.4966, 'learning_rate': 0.0009951045838896306, 'epoch': 0.73}\n",
            "{'eval_loss': 0.39048334837795856, 'eval_r2_score': 0.4298815264897454, 'eval_mse': 0.3904833483779585, 'eval_runtime': 48.3836, 'eval_samples_per_second': 170.636, 'eval_steps_per_second': 2.666, 'epoch': 0.73}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.3569, 'learning_rate': 0.0009946595460614152, 'epoch': 0.8}\n",
            "{'eval_loss': 0.3382572675593198, 'eval_r2_score': 0.5061333144275124, 'eval_mse': 0.3382572675593201, 'eval_runtime': 49.1856, 'eval_samples_per_second': 167.854, 'eval_steps_per_second': 2.623, 'epoch': 0.8}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.2885, 'learning_rate': 0.0009942145082331999, 'epoch': 0.87}\n",
            "{'eval_loss': 0.29172857045830414, 'eval_r2_score': 0.5740667355985904, 'eval_mse': 0.29172857045830347, 'eval_runtime': 47.7038, 'eval_samples_per_second': 173.068, 'eval_steps_per_second': 2.704, 'epoch': 0.87}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.7431, 'learning_rate': 0.0009937694704049845, 'epoch': 0.93}\n",
            "{'eval_loss': 0.35955250157806407, 'eval_r2_score': 0.4750415755294468, 'eval_mse': 0.359552501578064, 'eval_runtime': 47.762, 'eval_samples_per_second': 172.857, 'eval_steps_per_second': 2.701, 'epoch': 0.93}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.342, 'learning_rate': 0.0009933244325767692, 'epoch': 1.0}\n",
            "{'eval_loss': 0.3039052880761863, 'eval_r2_score': 0.5562883291966014, 'eval_mse': 0.30390528807618633, 'eval_runtime': 48.214, 'eval_samples_per_second': 171.236, 'eval_steps_per_second': 2.676, 'epoch': 1.0}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.355, 'learning_rate': 0.0009928793947485536, 'epoch': 1.07}\n",
            "{'eval_loss': 0.32690981537145153, 'eval_r2_score': 0.5227009661505684, 'eval_mse': 0.32690981537145075, 'eval_runtime': 48.2246, 'eval_samples_per_second': 171.199, 'eval_steps_per_second': 2.675, 'epoch': 1.07}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.5317, 'learning_rate': 0.0009924343569203383, 'epoch': 1.13}\n",
            "{'eval_loss': 0.29316613445043976, 'eval_r2_score': 0.5719678451025568, 'eval_mse': 0.2931661344504395, 'eval_runtime': 47.7238, 'eval_samples_per_second': 172.995, 'eval_steps_per_second': 2.703, 'epoch': 1.13}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.3575, 'learning_rate': 0.000991989319092123, 'epoch': 1.2}\n",
            "{'eval_loss': 0.2945830008131623, 'eval_r2_score': 0.5698991738231309, 'eval_mse': 0.2945830008131623, 'eval_runtime': 47.8511, 'eval_samples_per_second': 172.535, 'eval_steps_per_second': 2.696, 'epoch': 1.2}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.3253, 'learning_rate': 0.0009915442812639076, 'epoch': 1.27}\n",
            "{'eval_loss': 0.3078912184386207, 'eval_r2_score': 0.550468740363457, 'eval_mse': 0.3078912184386206, 'eval_runtime': 48.0808, 'eval_samples_per_second': 171.711, 'eval_steps_per_second': 2.683, 'epoch': 1.27}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.2353, 'learning_rate': 0.000991099243435692, 'epoch': 1.34}\n",
            "{'eval_loss': 0.2779993545307931, 'eval_r2_score': 0.5941118403632328, 'eval_mse': 0.2779993545307938, 'eval_runtime': 48.0638, 'eval_samples_per_second': 171.772, 'eval_steps_per_second': 2.684, 'epoch': 1.34}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.4669, 'learning_rate': 0.0009906542056074767, 'epoch': 1.4}\n",
            "{'eval_loss': 0.29213608560923726, 'eval_r2_score': 0.5734717501357075, 'eval_mse': 0.2921360856092374, 'eval_runtime': 47.6997, 'eval_samples_per_second': 173.083, 'eval_steps_per_second': 2.704, 'epoch': 1.4}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.2241, 'learning_rate': 0.0009902091677792613, 'epoch': 1.47}\n",
            "{'eval_loss': 0.3985658968347184, 'eval_r2_score': 0.41808074111085236, 'eval_mse': 0.39856589683471705, 'eval_runtime': 48.1956, 'eval_samples_per_second': 171.302, 'eval_steps_per_second': 2.677, 'epoch': 1.47}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.6035, 'learning_rate': 0.0009897641299510458, 'epoch': 1.54}\n",
            "{'eval_loss': 0.3036625391360481, 'eval_r2_score': 0.5566427505970858, 'eval_mse': 0.30366253913604796, 'eval_runtime': 48.1188, 'eval_samples_per_second': 171.575, 'eval_steps_per_second': 2.681, 'epoch': 1.54}\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-6-5df45e9992a5>:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
            "<ipython-input-6-5df45e9992a5>:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
            "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'loss': 0.3334, 'learning_rate': 0.0009893190921228304, 'epoch': 1.6}\n"
          ]
        },
        {
          "output_type": "error",
          "ename": "KeyboardInterrupt",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-8-7ec879d4ca03>\u001b[0m in \u001b[0;36m<cell line: 71>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     69\u001b[0m )\n\u001b[1;32m     70\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 71\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1553\u001b[0m                 \u001b[0mhf_hub_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0menable_progress_bars\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1554\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1555\u001b[0;31m             return inner_training_loop(\n\u001b[0m\u001b[1;32m   1556\u001b[0m                 \u001b[0margs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1557\u001b[0m                 \u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresume_from_checkpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   1927\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_step_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1928\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1929\u001b[0;31m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_log_save_evaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtr_loss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_keys_for_eval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1930\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1931\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallback_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_substep_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontrol\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36m_maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   2254\u001b[0m                     \u001b[0mmetrics\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset_metrics\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2255\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2256\u001b[0;31m                 \u001b[0mmetrics\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mignore_keys\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mignore_keys_for_eval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2257\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_report_to_hp_search\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrial\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mglobal_step\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2258\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mevaluate\u001b[0;34m(self, eval_dataset, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m   2970\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2971\u001b[0m         \u001b[0meval_loop\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprediction_loop\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_legacy_prediction_loop\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluation_loop\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2972\u001b[0;31m         output = eval_loop(\n\u001b[0m\u001b[1;32m   2973\u001b[0m             \u001b[0meval_dataloader\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2974\u001b[0m             \u001b[0mdescription\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"Evaluation\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mevaluation_loop\u001b[0;34m(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m   3159\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3160\u001b[0m             \u001b[0;31m# Prediction step\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3161\u001b[0;31m             \u001b[0mloss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogits\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprediction_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprediction_loss_only\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mignore_keys\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mignore_keys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3162\u001b[0m             \u001b[0mmain_input_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"main_input_name\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"input_ids\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3163\u001b[0m             \u001b[0minputs_decode\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmain_input_name\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minclude_inputs_for_metrics\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mprediction_step\u001b[0;34m(self, model, inputs, prediction_loss_only, ignore_keys)\u001b[0m\n\u001b[1;32m   3374\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mhas_labels\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mloss_without_labels\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3375\u001b[0m                     \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_loss_context_manager\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3376\u001b[0;31m                         \u001b[0mloss\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreturn_outputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3377\u001b[0m                     \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdetach\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3378\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mcompute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m   2705\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2706\u001b[0m             \u001b[0mlabels\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2707\u001b[0;31m         \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   2708\u001b[0m         \u001b[0;31m# Save past state if it exists\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   2709\u001b[0m         \u001b[0;31m# TODO: this needs to be fixed and made cleaner later.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1499\u001b[0m                 \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1500\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1502\u001b[0m         \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1503\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/content/reservoirtransformers.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_ids, reservoir_ids, state_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, labels_ids, output_attentions, output_hidden_states, return_dict, id)\u001b[0m\n\u001b[1;32m   1100\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreservoir\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreservoirs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1102\u001b[0;31m             \u001b[0mreservoir_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreservoir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreservoir_ids\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdouble\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstate_ids\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs_embeds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1103\u001b[0m             \u001b[0moutput_re\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreservoir_output\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Output'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1104\u001b[0m             \u001b[0mres_state\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mreservoir_output\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'State'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1499\u001b[0m                 \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1500\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1502\u001b[0m         \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1503\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/content/reservoirtransformers.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input_data, res_state)\u001b[0m\n\u001b[1;32m    996\u001b[0m             \u001b[0minput_proj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mW_in\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    997\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 998\u001b[0;31m             \u001b[0mres_proj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mW_res\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres_state\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    999\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1000\u001b[0m             \u001b[0;31m#print('res_state', res_state.shape)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1499\u001b[0m                 \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_pre_hooks\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0m_global_backward_hooks\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1500\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1502\u001b[0m         \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1503\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    113\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mTensor\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 114\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    115\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    116\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mextra_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
          ]
        }
      ],
      "source": [
        "from transformers import BertForSequenceClassification, Trainer, TrainingArguments\n",
        "from sklearn.metrics import mean_squared_error\n",
        "from transformers import Trainer, TrainingArguments\n",
        "from transformers import EarlyStoppingCallback, IntervalStrategy\n",
        "from sklearn.metrics import r2_score, accuracy_score\n",
        "import numpy as np\n",
        "\n",
        "def compute_metrics1(p):\n",
        "\n",
        "    preds = p.predictions.flatten()\n",
        "    labels = p.label_ids.flatten()\n",
        "\n",
        "    r2 = r2_score(labels, preds)\n",
        "    mse = mean_squared_error(labels, preds)\n",
        "\n",
        "    return {\"r2_score\": r2, \"mse\": mse}\n",
        "\n",
        "def compute_metrics_classification(p):\n",
        "    preds = np.argmax(p.predictions , axis=1)\n",
        "    labels = p.label_ids\n",
        "\n",
        "    accuracy = accuracy_score(labels, preds)\n",
        "    return {\"accuracy_score\": accuracy}\n",
        "\n",
        "def compute_metrics(p):\n",
        "\n",
        "\n",
        "    prediction_scores, labels_ids = p\n",
        "    #print('here')\n",
        "    #print(prediction_scores)\n",
        "\n",
        "    mask = labels_ids != 100\n",
        "    #print(mask)\n",
        "    masked_predictions = prediction_scores[mask]\n",
        "    masked_labels = labels_ids[mask]\n",
        "\n",
        "    mse = mean_squared_error(masked_predictions, masked_labels)\n",
        "    return {\"mse\": mse}\n",
        "\n",
        "training_args = TrainingArguments(\n",
        "    output_dir='./results_task1',\n",
        "    num_train_epochs=150,\n",
        "    label_names=[\"labels_ids\"],\n",
        "    disable_tqdm = True,\n",
        "    #label_names=[\"labels_mask\"],\n",
        "    do_eval=True,\n",
        "    learning_rate=0.001,\n",
        "    per_device_train_batch_size=batch,\n",
        "    per_device_eval_batch_size=batch,\n",
        "    logging_dir='./logs',\n",
        "    logging_strategy=\"steps\",\n",
        "    logging_steps=50,\n",
        "    evaluation_strategy=\"steps\",\n",
        "    eval_steps = 50,\n",
        "    save_strategy=\"steps\",\n",
        "    save_steps=50,\n",
        "\n",
        "    save_total_limit=2,\n",
        "    load_best_model_at_end=True,\n",
        ")\n",
        "\n",
        "trainer = CustomTrainer(\n",
        "    model=model,\n",
        "    args=training_args,\n",
        "    train_dataset=train_dataset,\n",
        "    eval_dataset=val_dataset,\n",
        "    compute_metrics=compute_metrics1, #compute_metrics1,#compute_metrics_classification,\n",
        "    callbacks = [EarlyStoppingCallback(early_stopping_patience=50)]\n",
        ")\n",
        "\n",
        "trainer.train()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "id": "cac7d37e",
      "metadata": {
        "id": "cac7d37e"
      },
      "outputs": [],
      "source": [
        "model = reservoirtransformers.ReservoirTTimeSeries.from_pretrained(\"/content/results_task1/checkpoint-1000\", config=configuration).to(\"cuda\", dtype=float)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "id": "827f3400",
      "metadata": {
        "id": "827f3400"
      },
      "outputs": [],
      "source": [
        "cnt = 0\n",
        "ln = len(X_test)\n",
        "y_pred = []\n",
        "y_test1 = []\n",
        "while cnt < ln:\n",
        "    #print(cnt, ln)\n",
        "    input_ids = torch.stack(X_test[cnt:cnt+batch], dim=0)\n",
        "    #y_test1 = y_test1 + [k.detach().numpy().flatten() for k in y_test[cnt:cnt+64]]\n",
        "\n",
        "    output = model(inputs_embeds = input_ids.to(model.device))['logits']\n",
        "    y_pred = y_pred + list(output.cpu().detach().numpy().reshape(output.size(0), -1))\n",
        "    #y_test = y_test + labels_ids\n",
        "\n",
        "    cnt=cnt+batch\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "id": "1fae404b",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "1fae404b",
        "outputId": "30d442ef-6cc3-4992-8e3d-63fc6a009e1f"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.28511558109858404"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ],
      "source": [
        "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
        "y_test1 = [i.detach().numpy().flatten() for i in y_test]\n",
        "\n",
        "mse = mean_squared_error(y_test1, y_pred)\n",
        "mse"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.4"
    },
    "colab": {
      "provenance": [],
      "gpuType": "T4"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "e6ea54fa204f420abc9f819282db1296": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_4c4534e14e5042dd82da2adddd3d1e05",
              "IPY_MODEL_8716801d7dd64129a8fdb2565abece36",
              "IPY_MODEL_eb798316f734455b84d2fe4a3f97bde2"
            ],
            "layout": "IPY_MODEL_a07175c34b8147359c110edd1a0bf77c"
          }
        },
        "4c4534e14e5042dd82da2adddd3d1e05": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ef2730c3d0be4e8a8c3116a8f7572e1e",
            "placeholder": "​",
            "style": "IPY_MODEL_2ad8c039f15a4f69acea32560f9142fb",
            "value": "100%"
          }
        },
        "8716801d7dd64129a8fdb2565abece36": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8cd998c846c1484b94b6303bf4da4dfa",
            "max": 68577,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_2f6e423409b9428fbc507343e7818f60",
            "value": 68577
          }
        },
        "eb798316f734455b84d2fe4a3f97bde2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_bd7ebe4158de456994de17fc28039eca",
            "placeholder": "​",
            "style": "IPY_MODEL_bcf2168e5c5e4834b1100834fa058ffa",
            "value": " 68577/68577 [00:00&lt;00:00, 625066.08it/s]"
          }
        },
        "a07175c34b8147359c110edd1a0bf77c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ef2730c3d0be4e8a8c3116a8f7572e1e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2ad8c039f15a4f69acea32560f9142fb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "8cd998c846c1484b94b6303bf4da4dfa": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2f6e423409b9428fbc507343e7818f60": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "bd7ebe4158de456994de17fc28039eca": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "bcf2168e5c5e4834b1100834fa058ffa": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}