{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "3eeaaae3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import reservoirtransformers\n",
    "import math\n",
    "import os\n",
    "import warnings\n",
    "from dataclasses import dataclass\n",
    "from typing import List, Optional, Tuple, Union\n",
    "\n",
    "import torch\n",
    "import torch.utils.checkpoint\n",
    "from torch import nn\n",
    "from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss\n",
    "import torch.nn.functional as F"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7eb58cec",
   "metadata": {},
   "outputs": [],
   "source": [
    "from configuration import ReservoirTConfig\n",
    "\n",
    "configuration = ReservoirTConfig()\n",
    "\n",
    "configuration.output_size=862\n",
    "configuration.re_output_size=16\n",
    "configuration.max_sequence_length=3399\n",
    "configuration.sequence_length=384\n",
    "configuration.pred_len=336\n",
    "configuration.hidden_size=864\n",
    "configuration.num_attention_heads=4\n",
    "configuration.hidden_dropout_prob=0.0\n",
    "configuration.num_hidden_layers=16\n",
    "configuration.num_reservoirs = 10\n",
    "configuration.intermediate_size=128\n",
    "configuration.reservoir_size = [30, 15, 20, 25, 30, 35, 40, 45, 50, 50]\n",
    "configuration.spectral_radius = [0.6, 0.8, 0.55, 0.6, 0.5, 0.4, 0.3, 0.2, 0.81, 0.05]\n",
    "configuration.sparsity = [0.6, 0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15]\n",
    "configuration.leaky = [0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39]\n",
    "configuration.attention_probs_dropout_prob=0.0\n",
    "#bert_model = TabularBertForRegression(config=configuration).to(\"cpu\", dtype=float)\n",
    "model = reservoirtransformers.ReservoirTTimeSeries(config=configuration).to(\"cpu\", dtype=float)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0f6bf41e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"\\nzero_col = np.zeros((X.shape[0], 1))\\n\\n# Concatenate the original array with the zero column along the second axis (columns)\\nX = np.hstack((X, zero_col))\\n#X =  dataset.drop(['ate'], axis = 1).values\\n\\n#X_train, X_test, y_train, y_test =train_test_split(X.values, y, test_size=0.2, shuffle=False)\\n\""
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "# prepare data for lstm\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from pandas import read_csv\n",
    "from pandas import DataFrame\n",
    "import random\n",
    "from sklearn.model_selection import train_test_split\n",
    "from pandas import concat\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "dataset= read_csv('traffic.csv')\n",
    "dataset=dataset.dropna()\n",
    "dataset = dataset.drop(['date'], axis = 1)\n",
    "dataset = dataset.dropna()\n",
    "#data = dataset.values[0:14000]\n",
    "\n",
    "\n",
    "y = dataset.OT.values\n",
    "\n",
    "\n",
    "X = dataset.values\n",
    "\n",
    "scaler = StandardScaler()\n",
    "X = scaler.fit_transform(X)\n",
    "\n",
    "\n",
    "\n",
    "#X=X[1:]\n",
    "\n",
    "#Reservoir_id = np.array([[0] * len(X[0])] + X[:-1].tolist())\n",
    "# Create a zero column of shape (100, 1)\n",
    "'''\n",
    "zero_col = np.zeros((X.shape[0], 1))\n",
    "\n",
    "# Concatenate the original array with the zero column along the second axis (columns)\n",
    "X = np.hstack((X, zero_col))\n",
    "#X =  dataset.drop(['ate'], axis = 1).values\n",
    "\n",
    "#X_train, X_test, y_train, y_test =train_test_split(X.values, y, test_size=0.2, shuffle=False)\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a0f3d60a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3000, 862)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "6d0706c5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "76f61b899c4e4f388615c772dfbc2722",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/1897 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\826414745.py:9: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ..\\torch\\csrc\\utils\\tensor_new.cpp:248.)\n",
      "  return torch.tensor(sequences), torch.tensor(targets)\n"
     ]
    }
   ],
   "source": [
    "from tqdm.auto import tqdm\n",
    "# 1. Preprocess the data into the required format\n",
    "def create_sequences(data, seq_length, pred_length):\n",
    "    sequences = []\n",
    "    targets = []\n",
    "    for i in tqdm(range(len(data) - seq_length - pred_length + 1)):\n",
    "        sequences.append(data[i:i+seq_length])\n",
    "        targets.append(data[i+seq_length:i+seq_length+pred_length])\n",
    "    return torch.tensor(sequences), torch.tensor(targets)\n",
    "\n",
    "X, y = create_sequences(X, seq_length=configuration.sequence_length, pred_length=configuration.pred_len)\n",
    "# Zeros tensor of shape [16941, 384, 1]\n",
    "\n",
    "zeros = torch.zeros((X.size(0), X.size(1), 2), dtype=X.dtype)\n",
    "\n",
    "# Concatenate along the last dimension\n",
    "X = torch.cat((X, zeros), dim=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3e13e0d0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(torch.Size([1897, 384, 864]), torch.Size([1897, 720, 862]))"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.shape, y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7955f82a",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "batch=32\n",
    "indices = np.arange(len(X)) \n",
    "barrier = int(len(indices)/batch)*batch\n",
    "indices = indices[0:barrier]\n",
    "soft_border = int((configuration.sequence_length/batch))+8\n",
    "\n",
    "indices = [indices[i:i+batch] for i in range(0, len(indices), batch)]\n",
    "\n",
    "border1 = int(len(indices)*0.7)\n",
    "border2 = border1+int(len(indices)*0.1)\n",
    "border3 = border2+int(len(indices)*0.2)\n",
    "\n",
    "train_ind = indices[0:border1]\n",
    "val_ind = indices[border1-soft_border: border2]\n",
    "test_ind = indices[border2-soft_border: border3]\n",
    "\n",
    "random.shuffle(train_ind)\n",
    "random.shuffle(val_ind)\n",
    "#random.shuffle(test_ind)\n",
    "\n",
    "\n",
    "X_train = [X[item] for sublist in train_ind for item in sublist]\n",
    "y_train = [y[item] for sublist in train_ind for item in sublist]\n",
    "\n",
    "X_val = [X[item] for sublist in val_ind for item in sublist]\n",
    "y_val = [y[item] for sublist in val_ind for item in sublist]\n",
    "\n",
    "X_test = [X[item] for sublist in test_ind for item in sublist]\n",
    "y_test = [y[item] for sublist in test_ind for item in sublist]\n",
    "\n",
    "#train_indices, test_indices =train_test_split(indices,  test_size=0.2, shuffle=False)\n",
    "#indices = [item for sublist in indices for item in sublist]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "26d5bda1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "\n",
    "class CustomDataset(Dataset):\n",
    "    def __init__(self, tokenized_inputs,  labels=None, pos=None):\n",
    "        self.tokenized_inputs = tokenized_inputs\n",
    "        self.labels = labels\n",
    "        self.pos = pos\n",
    "        self.id_list = None\n",
    "        self.re = None\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.tokenized_inputs)\n",
    "\n",
    "    def __getitem__(self, idx):\n",
    "        if self.labels is not None:\n",
    "            return {\n",
    "                \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
    "                \"labels_ids\": torch.tensor(self.labels[idx]),\n",
    "                #\"id\": torch.tensor(self.id_list[idx]),  # Include the id directly\n",
    "                #\"reservoir_ids\": torch.tensor(self.re[idx]),\n",
    "            }\n",
    "        else:\n",
    "            return {\n",
    "                \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
    "            }\n",
    "\n",
    "# Assuming you have X_train, y_train, X_test, y_test, trainpos, and testpos defined\n",
    "\n",
    "\n",
    "train_dataset = CustomDataset(X_train, y_train)\n",
    "\n",
    "test_dataset = CustomDataset(X_test, y_test)\n",
    "\n",
    "val_dataset = CustomDataset(X_val, y_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "13806390",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import Trainer, TrainingArguments\n",
    "from torch.utils.data import DataLoader\n",
    "from torch.cuda.amp import GradScaler\n",
    "class CustomTrainer(Trainer):\n",
    "    def __init__(self, *args, gradient_accumulation_steps=1, **kwargs):\n",
    "        super().__init__(*args, **kwargs)\n",
    "        self.gradient_accumulation_steps = gradient_accumulation_steps\n",
    "        self.scaler = GradScaler()\n",
    "\n",
    "    def training_step(self, model, inputs):\n",
    "        model.train()\n",
    "        inputs = self._prepare_inputs(inputs)\n",
    "        loss = self.compute_loss(model, inputs)\n",
    "\n",
    "        if self.args.n_gpu > 1:\n",
    "            loss = loss.mean()  # mean() to average on multi-gpu parallel training\n",
    "\n",
    "        loss = loss / self.gradient_accumulation_steps\n",
    "        self.scaler.scale(loss).backward()\n",
    "\n",
    "        return loss.detach()\n",
    "\n",
    "\n",
    "    def get_train_dataloader(self) -> DataLoader:\n",
    "        \"\"\"\n",
    "        Returns the training [`~torch.utils.data.DataLoader`].\n",
    "        Will use no sampler if `train_dataset` does not implement `__len__`, a random sampler (adapted to distributed\n",
    "        training if necessary) otherwise.\n",
    "        Subclass and override this method if you want to inject some custom behavior.\n",
    "        \"\"\"\n",
    "        if self.train_dataset is None:\n",
    "            raise ValueError(\"Trainer: training requires a train_dataset.\")\n",
    "\n",
    "        train_dataset = self.train_dataset\n",
    "\n",
    "\n",
    "        loader =  DataLoader(\n",
    "            train_dataset,\n",
    "            batch_size=self._train_batch_size,\n",
    "            drop_last=self.args.dataloader_drop_last,\n",
    "            shuffle = False,\n",
    "        )\n",
    "        return loader\n",
    "\n",
    "#bert_model = TabularBertForSequenceClassification(config=configuration).to(\"cpu\", dtype=float)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "cb4d10de",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n",
      "Could not estimate the number of tokens of the input, floating-point operations will not be computed\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.9463, 'learning_rate': 4.9796747967479676e-05, 'epoch': 0.61}\n",
      "{'eval_loss': 0.9189189431931412, 'eval_r2_score': 0.01628871094205031, 'eval_mse': 0.9189189431931457, 'eval_runtime': 99.6319, 'eval_samples_per_second': 6.905, 'eval_steps_per_second': 0.432, 'epoch': 0.61}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.9324, 'learning_rate': 4.959349593495935e-05, 'epoch': 1.22}\n",
      "{'eval_loss': 0.9139789159854286, 'eval_r2_score': 0.021577056087710433, 'eval_mse': 0.9139789159854227, 'eval_runtime': 94.4889, 'eval_samples_per_second': 7.281, 'eval_steps_per_second': 0.455, 'epoch': 1.22}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.9298, 'learning_rate': 4.9390243902439024e-05, 'epoch': 1.83}\n",
      "{'eval_loss': 0.912665332019612, 'eval_r2_score': 0.022983259960071778, 'eval_mse': 0.9126653320196009, 'eval_runtime': 91.4031, 'eval_samples_per_second': 7.527, 'eval_steps_per_second': 0.47, 'epoch': 1.83}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.7884, 'learning_rate': 4.9186991869918704e-05, 'epoch': 2.44}\n",
      "{'eval_loss': 0.540036803462277, 'eval_r2_score': 0.4218855710748436, 'eval_mse': 0.5400368034622796, 'eval_runtime': 93.1906, 'eval_samples_per_second': 7.383, 'eval_steps_per_second': 0.461, 'epoch': 2.44}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5221, 'learning_rate': 4.898373983739837e-05, 'epoch': 3.05}\n",
      "{'eval_loss': 0.47051958594256743, 'eval_r2_score': 0.496304400030974, 'eval_mse': 0.4705195859425666, 'eval_runtime': 92.967, 'eval_samples_per_second': 7.4, 'eval_steps_per_second': 0.463, 'epoch': 3.05}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4751, 'learning_rate': 4.878048780487805e-05, 'epoch': 3.66}\n",
      "{'eval_loss': 0.434861336787101, 'eval_r2_score': 0.5344768879333057, 'eval_mse': 0.4348613367871099, 'eval_runtime': 93.2593, 'eval_samples_per_second': 7.377, 'eval_steps_per_second': 0.461, 'epoch': 3.66}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4463, 'learning_rate': 4.8577235772357725e-05, 'epoch': 4.27}\n",
      "{'eval_loss': 0.4041676312430797, 'eval_r2_score': 0.5673347856514219, 'eval_mse': 0.4041676312430831, 'eval_runtime': 93.4154, 'eval_samples_per_second': 7.365, 'eval_steps_per_second': 0.46, 'epoch': 4.27}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4112, 'learning_rate': 4.8373983739837406e-05, 'epoch': 4.88}\n",
      "{'eval_loss': 0.3767060358412765, 'eval_r2_score': 0.5967326793529371, 'eval_mse': 0.37670603584127466, 'eval_runtime': 93.3276, 'eval_samples_per_second': 7.372, 'eval_steps_per_second': 0.461, 'epoch': 4.88}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3946, 'learning_rate': 4.817073170731707e-05, 'epoch': 5.49}\n",
      "{'eval_loss': 0.36889112969838767, 'eval_r2_score': 0.605098609180186, 'eval_mse': 0.36889112969839044, 'eval_runtime': 94.1758, 'eval_samples_per_second': 7.305, 'eval_steps_per_second': 0.457, 'epoch': 5.49}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3839, 'learning_rate': 4.796747967479675e-05, 'epoch': 6.1}\n",
      "{'eval_loss': 0.35900467331865404, 'eval_r2_score': 0.6156821528339174, 'eval_mse': 0.3590046733186544, 'eval_runtime': 94.5451, 'eval_samples_per_second': 7.277, 'eval_steps_per_second': 0.455, 'epoch': 6.1}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3756, 'learning_rate': 4.776422764227643e-05, 'epoch': 6.71}\n",
      "{'eval_loss': 0.3541691157219363, 'eval_r2_score': 0.6208586622877896, 'eval_mse': 0.35416911572193616, 'eval_runtime': 93.3233, 'eval_samples_per_second': 7.372, 'eval_steps_per_second': 0.461, 'epoch': 6.71}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3699, 'learning_rate': 4.75609756097561e-05, 'epoch': 7.32}\n",
      "{'eval_loss': 0.3459118455648494, 'eval_r2_score': 0.629698146913171, 'eval_mse': 0.34591184556484944, 'eval_runtime': 92.9501, 'eval_samples_per_second': 7.402, 'eval_steps_per_second': 0.463, 'epoch': 7.32}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3612, 'learning_rate': 4.7357723577235774e-05, 'epoch': 7.93}\n",
      "{'eval_loss': 0.34515533469892995, 'eval_r2_score': 0.6305079988425615, 'eval_mse': 0.34515533469893156, 'eval_runtime': 92.887, 'eval_samples_per_second': 7.407, 'eval_steps_per_second': 0.463, 'epoch': 7.93}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3531, 'learning_rate': 4.715447154471545e-05, 'epoch': 8.54}\n",
      "{'eval_loss': 0.33396273579962005, 'eval_r2_score': 0.6424897802311238, 'eval_mse': 0.3339627357996214, 'eval_runtime': 92.8816, 'eval_samples_per_second': 7.407, 'eval_steps_per_second': 0.463, 'epoch': 8.54}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3433, 'learning_rate': 4.695121951219512e-05, 'epoch': 9.15}\n",
      "{'eval_loss': 0.3249191380625405, 'eval_r2_score': 0.6521710358560739, 'eval_mse': 0.3249191380625441, 'eval_runtime': 93.5833, 'eval_samples_per_second': 7.352, 'eval_steps_per_second': 0.459, 'epoch': 9.15}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3309, 'learning_rate': 4.6747967479674795e-05, 'epoch': 9.76}\n",
      "{'eval_loss': 0.31651308154416624, 'eval_r2_score': 0.6611697976672664, 'eval_mse': 0.31651308154416596, 'eval_runtime': 93.7729, 'eval_samples_per_second': 7.337, 'eval_steps_per_second': 0.459, 'epoch': 9.76}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.319, 'learning_rate': 4.6544715447154476e-05, 'epoch': 10.37}\n",
      "{'eval_loss': 0.3058003593240807, 'eval_r2_score': 0.6726378666003343, 'eval_mse': 0.3058003593240794, 'eval_runtime': 93.019, 'eval_samples_per_second': 7.396, 'eval_steps_per_second': 0.462, 'epoch': 10.37}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3083, 'learning_rate': 4.634146341463415e-05, 'epoch': 10.98}\n",
      "{'eval_loss': 0.2979443534618666, 'eval_r2_score': 0.6810477940599999, 'eval_mse': 0.29794435346186376, 'eval_runtime': 93.5305, 'eval_samples_per_second': 7.356, 'eval_steps_per_second': 0.46, 'epoch': 10.98}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.2978, 'learning_rate': 4.613821138211382e-05, 'epoch': 11.59}\n",
      "{'eval_loss': 0.28886376828353866, 'eval_r2_score': 0.6907686450853705, 'eval_mse': 0.28886376828353794, 'eval_runtime': 93.166, 'eval_samples_per_second': 7.385, 'eval_steps_per_second': 0.462, 'epoch': 11.59}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.2912, 'learning_rate': 4.59349593495935e-05, 'epoch': 12.2}\n",
      "{'eval_loss': 0.28191552552101756, 'eval_r2_score': 0.698206803690367, 'eval_mse': 0.28191552552101756, 'eval_runtime': 93.1732, 'eval_samples_per_second': 7.384, 'eval_steps_per_second': 0.462, 'epoch': 12.2}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.2819, 'learning_rate': 4.573170731707318e-05, 'epoch': 12.8}\n",
      "{'eval_loss': 0.27707183758524684, 'eval_r2_score': 0.703392016747937, 'eval_mse': 0.2770718375852447, 'eval_runtime': 92.9989, 'eval_samples_per_second': 7.398, 'eval_steps_per_second': 0.462, 'epoch': 12.8}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.2734, 'learning_rate': 4.5528455284552844e-05, 'epoch': 13.41}\n",
      "{'eval_loss': 0.27036003838831424, 'eval_r2_score': 0.7105770603133333, 'eval_mse': 0.2703600383883142, 'eval_runtime': 94.5132, 'eval_samples_per_second': 7.279, 'eval_steps_per_second': 0.455, 'epoch': 13.41}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.2679, 'learning_rate': 4.5325203252032525e-05, 'epoch': 14.02}\n",
      "{'eval_loss': 0.2657049853691887, 'eval_r2_score': 0.715560337935369, 'eval_mse': 0.2657049853691867, 'eval_runtime': 93.3618, 'eval_samples_per_second': 7.369, 'eval_steps_per_second': 0.461, 'epoch': 14.02}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.2604, 'learning_rate': 4.51219512195122e-05, 'epoch': 14.63}\n",
      "{'eval_loss': 0.2567057366181816, 'eval_r2_score': 0.7251941175575864, 'eval_mse': 0.2567057366181822, 'eval_runtime': 94.0346, 'eval_samples_per_second': 7.316, 'eval_steps_per_second': 0.457, 'epoch': 14.63}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.2543, 'learning_rate': 4.491869918699187e-05, 'epoch': 15.24}\n",
      "{'eval_loss': 0.24811867042454736, 'eval_r2_score': 0.734386651912371, 'eval_mse': 0.2481186704245485, 'eval_runtime': 95.1254, 'eval_samples_per_second': 7.233, 'eval_steps_per_second': 0.452, 'epoch': 15.24}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_28908\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[10], line 68\u001b[0m\n\u001b[0;32m     37\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArguments(\n\u001b[0;32m     38\u001b[0m     output_dir\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./results_task1\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m     39\u001b[0m     num_train_epochs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m150\u001b[39m,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     56\u001b[0m     load_best_model_at_end\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m     57\u001b[0m )\n\u001b[0;32m     59\u001b[0m trainer \u001b[38;5;241m=\u001b[39m CustomTrainer(\n\u001b[0;32m     60\u001b[0m     model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[0;32m     61\u001b[0m     args\u001b[38;5;241m=\u001b[39mtraining_args,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     65\u001b[0m     callbacks \u001b[38;5;241m=\u001b[39m [EarlyStoppingCallback(early_stopping_patience\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50\u001b[39m)]\n\u001b[0;32m     66\u001b[0m )\n\u001b[1;32m---> 68\u001b[0m trainer\u001b[38;5;241m.\u001b[39mtrain()\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\trainer.py:1555\u001b[0m, in \u001b[0;36mTrainer.train\u001b[1;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[0;32m   1553\u001b[0m         hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[0;32m   1554\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1555\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m inner_training_loop(\n\u001b[0;32m   1556\u001b[0m         args\u001b[38;5;241m=\u001b[39margs,\n\u001b[0;32m   1557\u001b[0m         resume_from_checkpoint\u001b[38;5;241m=\u001b[39mresume_from_checkpoint,\n\u001b[0;32m   1558\u001b[0m         trial\u001b[38;5;241m=\u001b[39mtrial,\n\u001b[0;32m   1559\u001b[0m         ignore_keys_for_eval\u001b[38;5;241m=\u001b[39mignore_keys_for_eval,\n\u001b[0;32m   1560\u001b[0m     )\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\trainer.py:1837\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[1;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[0;32m   1834\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_begin(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[0;32m   1836\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39maccumulate(model):\n\u001b[1;32m-> 1837\u001b[0m     tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_step(model, inputs)\n\u001b[0;32m   1839\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m   1840\u001b[0m     args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[0;32m   1841\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[0;32m   1842\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[0;32m   1843\u001b[0m ):\n\u001b[0;32m   1844\u001b[0m     \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[0;32m   1845\u001b[0m     tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n",
      "Cell \u001b[1;32mIn[9], line 19\u001b[0m, in \u001b[0;36mCustomTrainer.training_step\u001b[1;34m(self, model, inputs)\u001b[0m\n\u001b[0;32m     16\u001b[0m     loss \u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mmean()  \u001b[38;5;66;03m# mean() to average on multi-gpu parallel training\u001b[39;00m\n\u001b[0;32m     18\u001b[0m loss \u001b[38;5;241m=\u001b[39m loss \u001b[38;5;241m/\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps\n\u001b[1;32m---> 19\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscaler\u001b[38;5;241m.\u001b[39mscale(loss)\u001b[38;5;241m.\u001b[39mbackward()\n\u001b[0;32m     21\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m loss\u001b[38;5;241m.\u001b[39mdetach()\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\torch\\_tensor.py:487\u001b[0m, in \u001b[0;36mTensor.backward\u001b[1;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[0;32m    477\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m    478\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[0;32m    479\u001b[0m         Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[0;32m    480\u001b[0m         (\u001b[38;5;28mself\u001b[39m,),\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    485\u001b[0m         inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[0;32m    486\u001b[0m     )\n\u001b[1;32m--> 487\u001b[0m torch\u001b[38;5;241m.\u001b[39mautograd\u001b[38;5;241m.\u001b[39mbackward(\n\u001b[0;32m    488\u001b[0m     \u001b[38;5;28mself\u001b[39m, gradient, retain_graph, create_graph, inputs\u001b[38;5;241m=\u001b[39minputs\n\u001b[0;32m    489\u001b[0m )\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\torch\\autograd\\__init__.py:200\u001b[0m, in \u001b[0;36mbackward\u001b[1;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[0;32m    195\u001b[0m     retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[0;32m    197\u001b[0m \u001b[38;5;66;03m# The reason we repeat same the comment below is that\u001b[39;00m\n\u001b[0;32m    198\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[0;32m    199\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[1;32m--> 200\u001b[0m Variable\u001b[38;5;241m.\u001b[39m_execution_engine\u001b[38;5;241m.\u001b[39mrun_backward(  \u001b[38;5;66;03m# Calls into the C++ engine to run the backward pass\u001b[39;00m\n\u001b[0;32m    201\u001b[0m     tensors, grad_tensors_, retain_graph, create_graph, inputs,\n\u001b[0;32m    202\u001b[0m     allow_unreachable\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, accumulate_grad\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "from transformers import BertForSequenceClassification, Trainer, TrainingArguments\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from transformers import Trainer, TrainingArguments\n",
    "from transformers import EarlyStoppingCallback, IntervalStrategy\n",
    "from sklearn.metrics import r2_score, accuracy_score\n",
    "import numpy as np\n",
    "\n",
    "def compute_metrics1(p):\n",
    "\n",
    "    preds = p.predictions.flatten()\n",
    "    labels = p.label_ids.flatten()\n",
    "\n",
    "    r2 = r2_score(labels, preds)\n",
    "    mse = mean_squared_error(labels, preds)\n",
    "    \n",
    "    return {\"r2_score\": r2, \"mse\": mse}\n",
    "\n",
    "def compute_metrics_classification(p):\n",
    "    preds = np.argmax(p.predictions , axis=1)\n",
    "    labels = p.label_ids\n",
    "    accuracy = accuracy_score(labels, preds)\n",
    "    return {\"accuracy_score\": accuracy}\n",
    "\n",
    "def compute_metrics(p):\n",
    "    prediction_scores, labels_ids = p\n",
    "    #print('here')\n",
    "    #print(prediction_scores)\n",
    "\n",
    "    mask = labels_ids != 100\n",
    "    #print(mask)\n",
    "    masked_predictions = prediction_scores[mask]\n",
    "    masked_labels = labels_ids[mask]\n",
    "\n",
    "    mse = mean_squared_error(masked_predictions, masked_labels)\n",
    "    return {\"mse\": mse}\n",
    "\n",
    "training_args = TrainingArguments(\n",
    "    output_dir='./results_task1',\n",
    "    num_train_epochs=150,\n",
    "    label_names=[\"labels_ids\"],\n",
    "    disable_tqdm = True,\n",
    "    #label_names=[\"labels_mask\"],\n",
    "    do_eval=True,\n",
    "    #learning_rate=0.001,\n",
    "    per_device_train_batch_size=batch,\n",
    "    per_device_eval_batch_size=batch,\n",
    "    logging_dir='./logs',\n",
    "    logging_strategy=\"steps\",\n",
    "    logging_steps=50,\n",
    "    evaluation_strategy=\"steps\",\n",
    "    eval_steps = 50,\n",
    "    save_strategy=\"steps\",\n",
    "    save_steps=50,\n",
    "\n",
    "    save_total_limit=2,\n",
    "    load_best_model_at_end=True,\n",
    ")\n",
    "\n",
    "trainer = CustomTrainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    train_dataset=train_dataset,\n",
    "    eval_dataset=val_dataset,\n",
    "    compute_metrics=compute_metrics1, #compute_metrics1,#compute_metrics_classification,\n",
    "    callbacks = [EarlyStoppingCallback(early_stopping_patience=50)]\n",
    ")\n",
    "\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "cac7d37e",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = reservoirtransformers.ReservoirTTimeSeries.from_pretrained(\"results_task1/checkpoint-1200\", config=configuration).to(\"cuda\", dtype=float)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "827f3400",
   "metadata": {},
   "outputs": [],
   "source": [
    "cnt = 0\n",
    "ln = len(X_test)\n",
    "y_pred = []\n",
    "y_test1 = []\n",
    "while cnt < ln:\n",
    "    #print(cnt, ln)\n",
    "    input_ids = torch.stack(X_test[cnt:cnt+batch], dim=0)\n",
    "    #y_test1 = y_test1 + [k.detach().numpy().flatten() for k in y_test[cnt:cnt+64]]\n",
    "    \n",
    "    output = model(inputs_embeds = input_ids.to(model.device))['logits']\n",
    "    y_pred = y_pred + list(output.cpu().detach().numpy().reshape(output.size(0), -1))\n",
    "    #y_test = y_test + labels_ids\n",
    "    \n",
    "    cnt=cnt+batch\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "1fae404b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.282363448096736"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "y_test1 = [i.detach().numpy().flatten() for i in y_test]\n",
    "\n",
    "mse = mean_squared_error(y_test1, y_pred)\n",
    "mse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05eafb3b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
