{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "3eeaaae3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import reservoirtransformers\n",
    "import math\n",
    "import os\n",
    "import warnings\n",
    "from dataclasses import dataclass\n",
    "from typing import List, Optional, Tuple, Union\n",
    "\n",
    "import torch\n",
    "import torch.utils.checkpoint\n",
    "from torch import nn\n",
    "from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss\n",
    "import torch.nn.functional as F"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7eb58cec",
   "metadata": {},
   "outputs": [],
   "source": [
    "from configuration import ReservoirTConfig\n",
    "\n",
    "configuration = ReservoirTConfig()\n",
    "\n",
    "configuration.output_size=7\n",
    "configuration.re_output_size=16\n",
    "configuration.max_sequence_length=1399\n",
    "configuration.sequence_length=384\n",
    "configuration.pred_len=96\n",
    "configuration.hidden_size=16\n",
    "configuration.num_attention_heads=16\n",
    "configuration.hidden_dropout_prob=0.0\n",
    "configuration.num_hidden_layers=16\n",
    "configuration.num_reservoirs = 10\n",
    "configuration.intermediate_size=128\n",
    "configuration.reservoir_size = [30, 15, 20, 25, 30, 35, 40, 45, 50, 50]\n",
    "configuration.spectral_radius = [0.6, 0.8, 0.55, 0.6, 0.5, 0.4, 0.3, 0.2, 0.81, 0.05]\n",
    "configuration.sparsity = [0.6, 0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15]\n",
    "configuration.leaky = [0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39]\n",
    "configuration.attention_probs_dropout_prob=0.0\n",
    "#bert_model = TabularBertForRegression(config=configuration).to(\"cpu\", dtype=float)\n",
    "model = reservoirtransformers.ReservoirTTimeSeries(config=configuration).to(\"cpu\", dtype=float)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "0f6bf41e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"\\nzero_col = np.zeros((X.shape[0], 1))\\n\\n# Concatenate the original array with the zero column along the second axis (columns)\\nX = np.hstack((X, zero_col))\\n#X =  dataset.drop(['ate'], axis = 1).values\\n\\n#X_train, X_test, y_train, y_test =train_test_split(X.values, y, test_size=0.2, shuffle=False)\\n\""
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "# prepare data for lstm\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from pandas import read_csv\n",
    "from pandas import DataFrame\n",
    "import random\n",
    "from sklearn.model_selection import train_test_split\n",
    "from pandas import concat\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "dataset= read_csv('ETTh1.csv')\n",
    "dataset=dataset.dropna()\n",
    "dataset = dataset.drop(['date'], axis = 1)\n",
    "dataset = dataset.dropna()\n",
    "#data = dataset.values[0:14000]\n",
    "\n",
    "\n",
    "y = dataset.OT.values\n",
    "\n",
    "\n",
    "X = dataset.values\n",
    "\n",
    "scaler = StandardScaler()\n",
    "X = scaler.fit_transform(X)\n",
    "\n",
    "\n",
    "\n",
    "#X=X[1:]\n",
    "\n",
    "#Reservoir_id = np.array([[0] * len(X[0])] + X[:-1].tolist())\n",
    "# Create a zero column of shape (100, 1)\n",
    "'''\n",
    "zero_col = np.zeros((X.shape[0], 1))\n",
    "\n",
    "# Concatenate the original array with the zero column along the second axis (columns)\n",
    "X = np.hstack((X, zero_col))\n",
    "#X =  dataset.drop(['ate'], axis = 1).values\n",
    "\n",
    "#X_train, X_test, y_train, y_test =train_test_split(X.values, y, test_size=0.2, shuffle=False)\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "6d0706c5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "dd85830941cd4a559e986ae944cfd131",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/16941 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([16941, 384, 7])\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\1869634036.py:9: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at ..\\torch\\csrc\\utils\\tensor_new.cpp:248.)\n",
      "  return torch.tensor(sequences), torch.tensor(targets)\n"
     ]
    }
   ],
   "source": [
    "from tqdm.auto import tqdm\n",
    "# 1. Preprocess the data into the required format\n",
    "def create_sequences(data, seq_length, pred_length):\n",
    "    sequences = []\n",
    "    targets = []\n",
    "    for i in tqdm(range(len(data) - seq_length - pred_length + 1)):\n",
    "        sequences.append(data[i:i+seq_length])\n",
    "        targets.append(data[i+seq_length:i+seq_length+pred_length])\n",
    "    return torch.tensor(sequences), torch.tensor(targets)\n",
    "\n",
    "X, y = create_sequences(X, seq_length=configuration.sequence_length, pred_length=configuration.pred_len)\n",
    "# Zeros tensor of shape [16941, 384, 1]\n",
    "print(X.shape)\n",
    "zeros = torch.zeros((X.size(0), X.size(1), 9), dtype=X.dtype)\n",
    "\n",
    "# Concatenate along the last dimension\n",
    "X = torch.cat((X, zeros), dim=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3e13e0d0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(torch.Size([16941, 384, 16]), torch.Size([16941, 96, 7]))"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.shape, y.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7955f82a",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "batch=16\n",
    "indices = np.arange(len(X)) \n",
    "barrier = int(len(indices)/batch)*batch\n",
    "indices = indices[0:barrier]\n",
    "soft_border = int((configuration.sequence_length/batch))+8\n",
    "\n",
    "indices = [indices[i:i+batch] for i in range(0, len(indices), batch)]\n",
    "\n",
    "border1 = int(len(indices)*0.7)\n",
    "border2 = border1+int(len(indices)*0.1)\n",
    "border3 = border2+int(len(indices)*0.2)\n",
    "\n",
    "train_ind = indices[0:border1]\n",
    "val_ind = indices[border1-soft_border: border2]\n",
    "test_ind = indices[border2-soft_border: border3]\n",
    "\n",
    "random.shuffle(train_ind)\n",
    "random.shuffle(val_ind)\n",
    "#random.shuffle(test_ind)\n",
    "\n",
    "\n",
    "X_train = [X[item] for sublist in train_ind for item in sublist]\n",
    "y_train = [y[item] for sublist in train_ind for item in sublist]\n",
    "\n",
    "X_val = [X[item] for sublist in val_ind for item in sublist]\n",
    "y_val = [y[item] for sublist in val_ind for item in sublist]\n",
    "\n",
    "X_test = [X[item] for sublist in test_ind for item in sublist]\n",
    "y_test = [y[item] for sublist in test_ind for item in sublist]\n",
    "\n",
    "#train_indices, test_indices =train_test_split(indices,  test_size=0.2, shuffle=False)\n",
    "#indices = [item for sublist in indices for item in sublist]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "26d5bda1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "\n",
    "class CustomDataset(Dataset):\n",
    "    def __init__(self, tokenized_inputs,  labels=None, pos=None):\n",
    "        self.tokenized_inputs = tokenized_inputs\n",
    "        self.labels = labels\n",
    "        self.pos = pos\n",
    "        self.id_list = None\n",
    "        self.re = None\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.tokenized_inputs)\n",
    "\n",
    "    def __getitem__(self, idx):\n",
    "        if self.labels is not None:\n",
    "            return {\n",
    "                \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
    "                \"labels_ids\": torch.tensor(self.labels[idx]),\n",
    "                #\"id\": torch.tensor(self.id_list[idx]),  # Include the id directly\n",
    "                #\"reservoir_ids\": torch.tensor(self.re[idx]),\n",
    "            }\n",
    "        else:\n",
    "            return {\n",
    "                \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
    "            }\n",
    "\n",
    "# Assuming you have X_train, y_train, X_test, y_test, trainpos, and testpos defined\n",
    "\n",
    "\n",
    "train_dataset = CustomDataset(X_train, y_train)\n",
    "\n",
    "test_dataset = CustomDataset(X_test, y_test)\n",
    "\n",
    "val_dataset = CustomDataset(X_val, y_val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "13806390",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import Trainer, TrainingArguments\n",
    "from torch.utils.data import DataLoader\n",
    "from torch.cuda.amp import GradScaler\n",
    "class CustomTrainer(Trainer):\n",
    "    def __init__(self, *args, gradient_accumulation_steps=1, **kwargs):\n",
    "        super().__init__(*args, **kwargs)\n",
    "        self.gradient_accumulation_steps = gradient_accumulation_steps\n",
    "        self.scaler = GradScaler()\n",
    "\n",
    "    def training_step(self, model, inputs):\n",
    "        model.train()\n",
    "        inputs = self._prepare_inputs(inputs)\n",
    "        loss = self.compute_loss(model, inputs)\n",
    "\n",
    "        if self.args.n_gpu > 1:\n",
    "            loss = loss.mean()  # mean() to average on multi-gpu parallel training\n",
    "\n",
    "        loss = loss / self.gradient_accumulation_steps\n",
    "        self.scaler.scale(loss).backward()\n",
    "\n",
    "        return loss.detach()\n",
    "\n",
    "\n",
    "    def get_train_dataloader(self) -> DataLoader:\n",
    "        \"\"\"\n",
    "        Returns the training [`~torch.utils.data.DataLoader`].\n",
    "        Will use no sampler if `train_dataset` does not implement `__len__`, a random sampler (adapted to distributed\n",
    "        training if necessary) otherwise.\n",
    "        Subclass and override this method if you want to inject some custom behavior.\n",
    "        \"\"\"\n",
    "        if self.train_dataset is None:\n",
    "            raise ValueError(\"Trainer: training requires a train_dataset.\")\n",
    "\n",
    "        train_dataset = self.train_dataset\n",
    "\n",
    "\n",
    "        loader =  DataLoader(\n",
    "            train_dataset,\n",
    "            batch_size=self._train_batch_size,\n",
    "            drop_last=self.args.dataloader_drop_last,\n",
    "            shuffle = False,\n",
    "        )\n",
    "        return loader\n",
    "\n",
    "#bert_model = TabularBertForSequenceClassification(config=configuration).to(\"cpu\", dtype=float)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "cb4d10de",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n",
      "Could not estimate the number of tokens of the input, floating-point operations will not be computed\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 1.0143, 'learning_rate': 4.997747747747748e-05, 'epoch': 0.07}\n",
      "{'eval_loss': 0.8444902700115172, 'eval_r2_score': -0.16064035244776287, 'eval_mse': 0.8444902700115173, 'eval_runtime': 67.0768, 'eval_samples_per_second': 32.679, 'eval_steps_per_second': 2.042, 'epoch': 0.07}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.8291, 'learning_rate': 4.995495495495496e-05, 'epoch': 0.14}\n",
      "{'eval_loss': 1.0653037121809081, 'eval_r2_score': -0.4641192680084951, 'eval_mse': 1.065303712180908, 'eval_runtime': 66.4181, 'eval_samples_per_second': 33.003, 'eval_steps_per_second': 2.063, 'epoch': 0.14}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.9616, 'learning_rate': 4.9932432432432435e-05, 'epoch': 0.2}\n",
      "{'eval_loss': 1.1247786837230422, 'eval_r2_score': -0.5458597621074293, 'eval_mse': 1.1247786837230425, 'eval_runtime': 65.3763, 'eval_samples_per_second': 33.529, 'eval_steps_per_second': 2.096, 'epoch': 0.2}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.7592, 'learning_rate': 4.9909909909909917e-05, 'epoch': 0.27}\n",
      "{'eval_loss': 1.1289348881963628, 'eval_r2_score': -0.5515719162861779, 'eval_mse': 1.1289348881963623, 'eval_runtime': 65.2583, 'eval_samples_per_second': 33.59, 'eval_steps_per_second': 2.099, 'epoch': 0.27}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.859, 'learning_rate': 4.988738738738739e-05, 'epoch': 0.34}\n",
      "{'eval_loss': 1.1610217130165825, 'eval_r2_score': -0.5956710196041608, 'eval_mse': 1.1610217130165819, 'eval_runtime': 66.9499, 'eval_samples_per_second': 32.741, 'eval_steps_per_second': 2.046, 'epoch': 0.34}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.7412, 'learning_rate': 4.986486486486487e-05, 'epoch': 0.41}\n",
      "{'eval_loss': 1.0740880271539066, 'eval_r2_score': -0.47619214887914674, 'eval_mse': 1.0740880271539068, 'eval_runtime': 67.3216, 'eval_samples_per_second': 32.56, 'eval_steps_per_second': 2.035, 'epoch': 0.41}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.7519, 'learning_rate': 4.984234234234235e-05, 'epoch': 0.47}\n",
      "{'eval_loss': 0.9598534461735606, 'eval_r2_score': -0.3191918031807375, 'eval_mse': 0.9598534461735609, 'eval_runtime': 52.5809, 'eval_samples_per_second': 41.688, 'eval_steps_per_second': 2.606, 'epoch': 0.47}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.7211, 'learning_rate': 4.9819819819819824e-05, 'epoch': 0.54}\n",
      "{'eval_loss': 0.9046751808084267, 'eval_r2_score': -0.2433565642975557, 'eval_mse': 0.9046751808084262, 'eval_runtime': 68.8975, 'eval_samples_per_second': 31.815, 'eval_steps_per_second': 1.988, 'epoch': 0.54}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.7375, 'learning_rate': 4.97972972972973e-05, 'epoch': 0.61}\n",
      "{'eval_loss': 0.8516418399112199, 'eval_r2_score': -0.17046924083605952, 'eval_mse': 0.85164183991122, 'eval_runtime': 25.5968, 'eval_samples_per_second': 85.636, 'eval_steps_per_second': 5.352, 'epoch': 0.61}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.6114, 'learning_rate': 4.977477477477478e-05, 'epoch': 0.68}\n",
      "{'eval_loss': 0.6988224441715292, 'eval_r2_score': 0.03956083722481629, 'eval_mse': 0.6988224441715291, 'eval_runtime': 25.6182, 'eval_samples_per_second': 85.564, 'eval_steps_per_second': 5.348, 'epoch': 0.68}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.7054, 'learning_rate': 4.9752252252252256e-05, 'epoch': 0.74}\n",
      "{'eval_loss': 0.6406366499714751, 'eval_r2_score': 0.11952952731627542, 'eval_mse': 0.6406366499714751, 'eval_runtime': 25.6675, 'eval_samples_per_second': 85.4, 'eval_steps_per_second': 5.337, 'epoch': 0.74}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5265, 'learning_rate': 4.972972972972974e-05, 'epoch': 0.81}\n",
      "{'eval_loss': 0.5995365927057521, 'eval_r2_score': 0.17601612833994473, 'eval_mse': 0.5995365927057525, 'eval_runtime': 25.1519, 'eval_samples_per_second': 87.151, 'eval_steps_per_second': 5.447, 'epoch': 0.81}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4678, 'learning_rate': 4.9707207207207207e-05, 'epoch': 0.88}\n",
      "{'eval_loss': 0.526652442884281, 'eval_r2_score': 0.2761857671630118, 'eval_mse': 0.5266524428842813, 'eval_runtime': 25.5168, 'eval_samples_per_second': 85.904, 'eval_steps_per_second': 5.369, 'epoch': 0.88}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5721, 'learning_rate': 4.968468468468468e-05, 'epoch': 0.95}\n",
      "{'eval_loss': 0.45672101875231774, 'eval_r2_score': 0.37229727446384697, 'eval_mse': 0.4567210187523177, 'eval_runtime': 68.1207, 'eval_samples_per_second': 32.178, 'eval_steps_per_second': 2.011, 'epoch': 0.95}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3902, 'learning_rate': 4.9662162162162164e-05, 'epoch': 1.01}\n",
      "{'eval_loss': 0.42922517456132003, 'eval_r2_score': 0.410086681193486, 'eval_mse': 0.4292251745613203, 'eval_runtime': 67.8278, 'eval_samples_per_second': 32.317, 'eval_steps_per_second': 2.02, 'epoch': 1.01}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.472, 'learning_rate': 4.963963963963964e-05, 'epoch': 1.08}\n",
      "{'eval_loss': 0.4026221776672822, 'eval_r2_score': 0.446648987223184, 'eval_mse': 0.402622177667282, 'eval_runtime': 67.4283, 'eval_samples_per_second': 32.509, 'eval_steps_per_second': 2.032, 'epoch': 1.08}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4739, 'learning_rate': 4.961711711711712e-05, 'epoch': 1.15}\n",
      "{'eval_loss': 0.3675831623874293, 'eval_r2_score': 0.4948054864606187, 'eval_mse': 0.36758316238742933, 'eval_runtime': 66.961, 'eval_samples_per_second': 32.735, 'eval_steps_per_second': 2.046, 'epoch': 1.15}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4679, 'learning_rate': 4.9594594594594596e-05, 'epoch': 1.22}\n",
      "{'eval_loss': 0.34585899204241816, 'eval_r2_score': 0.5246624896982348, 'eval_mse': 0.3458589920424181, 'eval_runtime': 65.0706, 'eval_samples_per_second': 33.686, 'eval_steps_per_second': 2.105, 'epoch': 1.22}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4346, 'learning_rate': 4.957207207207207e-05, 'epoch': 1.28}\n",
      "{'eval_loss': 0.33957944009976887, 'eval_r2_score': 0.5332929045635606, 'eval_mse': 0.3395794400997689, 'eval_runtime': 64.1531, 'eval_samples_per_second': 34.168, 'eval_steps_per_second': 2.136, 'epoch': 1.28}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4954, 'learning_rate': 4.954954954954955e-05, 'epoch': 1.35}\n",
      "{'eval_loss': 0.3370961074339925, 'eval_r2_score': 0.5367059173628821, 'eval_mse': 0.3370961074339925, 'eval_runtime': 64.7169, 'eval_samples_per_second': 33.871, 'eval_steps_per_second': 2.117, 'epoch': 1.35}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.409, 'learning_rate': 4.952702702702703e-05, 'epoch': 1.42}\n",
      "{'eval_loss': 0.32888786490753424, 'eval_r2_score': 0.5479870627320943, 'eval_mse': 0.32888786490753424, 'eval_runtime': 64.7839, 'eval_samples_per_second': 33.836, 'eval_steps_per_second': 2.115, 'epoch': 1.42}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4055, 'learning_rate': 4.95045045045045e-05, 'epoch': 1.49}\n",
      "{'eval_loss': 0.3306292852551221, 'eval_r2_score': 0.5455937104369204, 'eval_mse': 0.3306292852551221, 'eval_runtime': 65.6814, 'eval_samples_per_second': 33.373, 'eval_steps_per_second': 2.086, 'epoch': 1.49}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5069, 'learning_rate': 4.9481981981981985e-05, 'epoch': 1.55}\n",
      "{'eval_loss': 0.3166789620512714, 'eval_r2_score': 0.564766587396008, 'eval_mse': 0.31667896205127133, 'eval_runtime': 65.8276, 'eval_samples_per_second': 33.299, 'eval_steps_per_second': 2.081, 'epoch': 1.55}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.449, 'learning_rate': 4.945945945945946e-05, 'epoch': 1.62}\n",
      "{'eval_loss': 0.31277325315130267, 'eval_r2_score': 0.5701344684897193, 'eval_mse': 0.3127732531513028, 'eval_runtime': 66.7032, 'eval_samples_per_second': 32.862, 'eval_steps_per_second': 2.054, 'epoch': 1.62}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4403, 'learning_rate': 4.943693693693694e-05, 'epoch': 1.69}\n",
      "{'eval_loss': 0.3095499791752058, 'eval_r2_score': 0.574564432903166, 'eval_mse': 0.3095499791752057, 'eval_runtime': 66.848, 'eval_samples_per_second': 32.791, 'eval_steps_per_second': 2.049, 'epoch': 1.69}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4437, 'learning_rate': 4.941441441441442e-05, 'epoch': 1.76}\n",
      "{'eval_loss': 0.30912877308949166, 'eval_r2_score': 0.5751433250433555, 'eval_mse': 0.3091287730894917, 'eval_runtime': 67.9837, 'eval_samples_per_second': 32.243, 'eval_steps_per_second': 2.015, 'epoch': 1.76}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.378, 'learning_rate': 4.939189189189189e-05, 'epoch': 1.82}\n",
      "{'eval_loss': 0.3112708485724438, 'eval_r2_score': 0.5721993251752819, 'eval_mse': 0.31127084857244386, 'eval_runtime': 68.8458, 'eval_samples_per_second': 31.839, 'eval_steps_per_second': 1.99, 'epoch': 1.82}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4059, 'learning_rate': 4.9369369369369375e-05, 'epoch': 1.89}\n",
      "{'eval_loss': 0.318303596389335, 'eval_r2_score': 0.5625337420481231, 'eval_mse': 0.3183035963893353, 'eval_runtime': 68.7493, 'eval_samples_per_second': 31.884, 'eval_steps_per_second': 1.993, 'epoch': 1.89}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3915, 'learning_rate': 4.934684684684685e-05, 'epoch': 1.96}\n",
      "{'eval_loss': 0.3008190115122893, 'eval_r2_score': 0.5865639949411741, 'eval_mse': 0.3008190115122892, 'eval_runtime': 68.4246, 'eval_samples_per_second': 32.035, 'eval_steps_per_second': 2.002, 'epoch': 1.96}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3618, 'learning_rate': 4.9324324324324325e-05, 'epoch': 2.03}\n",
      "{'eval_loss': 0.300432200054208, 'eval_r2_score': 0.5870956162078493, 'eval_mse': 0.30043220005420784, 'eval_runtime': 67.8977, 'eval_samples_per_second': 32.284, 'eval_steps_per_second': 2.018, 'epoch': 2.03}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4125, 'learning_rate': 4.930180180180181e-05, 'epoch': 2.09}\n",
      "{'eval_loss': 0.31230893766557893, 'eval_r2_score': 0.5707726088071798, 'eval_mse': 0.3123089376655789, 'eval_runtime': 67.6523, 'eval_samples_per_second': 32.401, 'eval_steps_per_second': 2.025, 'epoch': 2.09}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4722, 'learning_rate': 4.927927927927928e-05, 'epoch': 2.16}\n",
      "{'eval_loss': 0.3056495653115645, 'eval_r2_score': 0.5799250366687098, 'eval_mse': 0.30564956531156434, 'eval_runtime': 66.8667, 'eval_samples_per_second': 32.782, 'eval_steps_per_second': 2.049, 'epoch': 2.16}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4406, 'learning_rate': 4.9256756756756764e-05, 'epoch': 2.23}\n",
      "{'eval_loss': 0.30420999529038784, 'eval_r2_score': 0.581903535552692, 'eval_mse': 0.30420999529038767, 'eval_runtime': 64.6233, 'eval_samples_per_second': 33.92, 'eval_steps_per_second': 2.12, 'epoch': 2.23}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3758, 'learning_rate': 4.923423423423424e-05, 'epoch': 2.3}\n",
      "{'eval_loss': 0.30136043718401606, 'eval_r2_score': 0.5858198768562508, 'eval_mse': 0.301360437184016, 'eval_runtime': 64.5268, 'eval_samples_per_second': 33.97, 'eval_steps_per_second': 2.123, 'epoch': 2.3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.4416, 'learning_rate': 4.9211711711711714e-05, 'epoch': 2.36}\n",
      "{'eval_loss': 0.3023540688363855, 'eval_r2_score': 0.5844542613694153, 'eval_mse': 0.30235406883638577, 'eval_runtime': 65.1476, 'eval_samples_per_second': 33.647, 'eval_steps_per_second': 2.103, 'epoch': 2.36}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3574, 'learning_rate': 4.9189189189189196e-05, 'epoch': 2.43}\n",
      "{'eval_loss': 0.30494009388150417, 'eval_r2_score': 0.5809001114562802, 'eval_mse': 0.3049400938815041, 'eval_runtime': 65.276, 'eval_samples_per_second': 33.581, 'eval_steps_per_second': 2.099, 'epoch': 2.43}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3568, 'learning_rate': 4.9166666666666665e-05, 'epoch': 2.5}\n",
      "{'eval_loss': 0.3084702291293665, 'eval_r2_score': 0.5760484067489999, 'eval_mse': 0.30847022912936645, 'eval_runtime': 65.5473, 'eval_samples_per_second': 33.442, 'eval_steps_per_second': 2.09, 'epoch': 2.5}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5419, 'learning_rate': 4.9144144144144147e-05, 'epoch': 2.57}\n",
      "{'eval_loss': 0.3054132941301785, 'eval_r2_score': 0.5802497602054688, 'eval_mse': 0.3054132941301787, 'eval_runtime': 65.7964, 'eval_samples_per_second': 33.315, 'eval_steps_per_second': 2.082, 'epoch': 2.57}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"inputs_embeds\": torch.tensor(self.tokenized_inputs[idx]),\n",
      "C:\\Users\\Kowsher\\AppData\\Local\\Temp\\ipykernel_25352\\2303645629.py:19: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n",
      "  \"labels_ids\": torch.tensor(self.labels[idx]),\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.3726, 'learning_rate': 4.912162162162162e-05, 'epoch': 2.64}\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[11], line 71\u001b[0m\n\u001b[0;32m     40\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArguments(\n\u001b[0;32m     41\u001b[0m     output_dir\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./results_task1\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[0;32m     42\u001b[0m     num_train_epochs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m150\u001b[39m,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     59\u001b[0m     load_best_model_at_end\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[0;32m     60\u001b[0m )\n\u001b[0;32m     62\u001b[0m trainer \u001b[38;5;241m=\u001b[39m CustomTrainer(\n\u001b[0;32m     63\u001b[0m     model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[0;32m     64\u001b[0m     args\u001b[38;5;241m=\u001b[39mtraining_args,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     68\u001b[0m     callbacks \u001b[38;5;241m=\u001b[39m [EarlyStoppingCallback(early_stopping_patience\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50\u001b[39m)]\n\u001b[0;32m     69\u001b[0m )\n\u001b[1;32m---> 71\u001b[0m trainer\u001b[38;5;241m.\u001b[39mtrain()\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\trainer.py:1555\u001b[0m, in \u001b[0;36mTrainer.train\u001b[1;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[0;32m   1553\u001b[0m         hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[0;32m   1554\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1555\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m inner_training_loop(\n\u001b[0;32m   1556\u001b[0m         args\u001b[38;5;241m=\u001b[39margs,\n\u001b[0;32m   1557\u001b[0m         resume_from_checkpoint\u001b[38;5;241m=\u001b[39mresume_from_checkpoint,\n\u001b[0;32m   1558\u001b[0m         trial\u001b[38;5;241m=\u001b[39mtrial,\n\u001b[0;32m   1559\u001b[0m         ignore_keys_for_eval\u001b[38;5;241m=\u001b[39mignore_keys_for_eval,\n\u001b[0;32m   1560\u001b[0m     )\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\trainer.py:1929\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[1;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[0;32m   1926\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mepoch \u001b[38;5;241m=\u001b[39m epoch \u001b[38;5;241m+\u001b[39m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m steps_skipped) \u001b[38;5;241m/\u001b[39m steps_in_epoch\n\u001b[0;32m   1927\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[1;32m-> 1929\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)\n\u001b[0;32m   1930\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m   1931\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_substep_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\trainer.py:2256\u001b[0m, in \u001b[0;36mTrainer._maybe_log_save_evaluate\u001b[1;34m(self, tr_loss, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[0;32m   2254\u001b[0m         metrics\u001b[38;5;241m.\u001b[39mupdate(dataset_metrics)\n\u001b[0;32m   2255\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 2256\u001b[0m     metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mevaluate(ignore_keys\u001b[38;5;241m=\u001b[39mignore_keys_for_eval)\n\u001b[0;32m   2257\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_report_to_hp_search(trial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step, metrics)\n\u001b[0;32m   2259\u001b[0m \u001b[38;5;66;03m# Run delayed LR scheduler now that metrics are populated\u001b[39;00m\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\trainer.py:2972\u001b[0m, in \u001b[0;36mTrainer.evaluate\u001b[1;34m(self, eval_dataset, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[0;32m   2969\u001b[0m start_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[0;32m   2971\u001b[0m eval_loop \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprediction_loop \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39muse_legacy_prediction_loop \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mevaluation_loop\n\u001b[1;32m-> 2972\u001b[0m output \u001b[38;5;241m=\u001b[39m eval_loop(\n\u001b[0;32m   2973\u001b[0m     eval_dataloader,\n\u001b[0;32m   2974\u001b[0m     description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEvaluation\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m   2975\u001b[0m     \u001b[38;5;66;03m# No point gathering the predictions if there are no metrics, otherwise we defer to\u001b[39;00m\n\u001b[0;32m   2976\u001b[0m     \u001b[38;5;66;03m# self.args.prediction_loss_only\u001b[39;00m\n\u001b[0;32m   2977\u001b[0m     prediction_loss_only\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_metrics \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m   2978\u001b[0m     ignore_keys\u001b[38;5;241m=\u001b[39mignore_keys,\n\u001b[0;32m   2979\u001b[0m     metric_key_prefix\u001b[38;5;241m=\u001b[39mmetric_key_prefix,\n\u001b[0;32m   2980\u001b[0m )\n\u001b[0;32m   2982\u001b[0m total_batch_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39meval_batch_size \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mworld_size\n\u001b[0;32m   2983\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmetric_key_prefix\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_jit_compilation_time\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m output\u001b[38;5;241m.\u001b[39mmetrics:\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\transformers\\trainer.py:3173\u001b[0m, in \u001b[0;36mTrainer.evaluation_loop\u001b[1;34m(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[0;32m   3171\u001b[0m     losses_host \u001b[38;5;241m=\u001b[39m losses \u001b[38;5;28;01mif\u001b[39;00m losses_host \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m nested_concat(losses_host, losses, padding_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m100\u001b[39m)\n\u001b[0;32m   3172\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m labels \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m-> 3173\u001b[0m     labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39mpad_across_processes(labels, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, pad_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m100\u001b[39m)\n\u001b[0;32m   3174\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inputs_decode \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m   3175\u001b[0m     inputs_decode \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39mpad_across_processes(inputs_decode, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, pad_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m100\u001b[39m)\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\accelerate\\accelerator.py:2176\u001b[0m, in \u001b[0;36mAccelerator.pad_across_processes\u001b[1;34m(self, tensor, dim, pad_index, pad_first)\u001b[0m\n\u001b[0;32m   2143\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpad_across_processes\u001b[39m(\u001b[38;5;28mself\u001b[39m, tensor, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, pad_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, pad_first\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[0;32m   2144\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   2145\u001b[0m \u001b[38;5;124;03m    Recursively pad the tensors in a nested list/tuple/dictionary of tensors from all devices to the same size so\u001b[39;00m\n\u001b[0;32m   2146\u001b[0m \u001b[38;5;124;03m    they can safely be gathered.\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   2174\u001b[0m \u001b[38;5;124;03m    ```\u001b[39;00m\n\u001b[0;32m   2175\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m-> 2176\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m pad_across_processes(tensor, dim\u001b[38;5;241m=\u001b[39mdim, pad_index\u001b[38;5;241m=\u001b[39mpad_index, pad_first\u001b[38;5;241m=\u001b[39mpad_first)\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\accelerate\\utils\\operations.py:345\u001b[0m, in \u001b[0;36mchained_operation.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    342\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(function)\n\u001b[0;32m    343\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapper\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m    344\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 345\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m function(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    346\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m DistributedOperationException \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m    347\u001b[0m         operation \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunction\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__module__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunction\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\accelerate\\utils\\operations.py:544\u001b[0m, in \u001b[0;36mpad_across_processes\u001b[1;34m(tensor, dim, pad_index, pad_first)\u001b[0m\n\u001b[0;32m    541\u001b[0m     new_tensor[indices] \u001b[38;5;241m=\u001b[39m tensor\n\u001b[0;32m    542\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m new_tensor\n\u001b[1;32m--> 544\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m recursively_apply(\n\u001b[0;32m    545\u001b[0m     _pad_across_processes, tensor, error_on_other_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, dim\u001b[38;5;241m=\u001b[39mdim, pad_index\u001b[38;5;241m=\u001b[39mpad_index, pad_first\u001b[38;5;241m=\u001b[39mpad_first\n\u001b[0;32m    546\u001b[0m )\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\accelerate\\utils\\operations.py:128\u001b[0m, in \u001b[0;36mrecursively_apply\u001b[1;34m(func, data, test_type, error_on_other_type, *args, **kwargs)\u001b[0m\n\u001b[0;32m    119\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(data)(\n\u001b[0;32m    120\u001b[0m         {\n\u001b[0;32m    121\u001b[0m             k: recursively_apply(\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    125\u001b[0m         }\n\u001b[0;32m    126\u001b[0m     )\n\u001b[0;32m    127\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m test_type(data):\n\u001b[1;32m--> 128\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m func(data, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m    129\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_on_other_type:\n\u001b[0;32m    130\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[0;32m    131\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsupported types (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(data)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) passed to `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`. Only nested list/tuple/dicts of \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    132\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobjects that are valid for `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtest_type\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` should be passed.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    133\u001b[0m     )\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\accelerate\\utils\\operations.py:524\u001b[0m, in \u001b[0;36mpad_across_processes.<locals>._pad_across_processes\u001b[1;34m(tensor, dim, pad_index, pad_first)\u001b[0m\n\u001b[0;32m    521\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m tensor\n\u001b[0;32m    523\u001b[0m \u001b[38;5;66;03m# Gather all sizes\u001b[39;00m\n\u001b[1;32m--> 524\u001b[0m size \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor(tensor\u001b[38;5;241m.\u001b[39mshape, device\u001b[38;5;241m=\u001b[39mtensor\u001b[38;5;241m.\u001b[39mdevice)[\u001b[38;5;28;01mNone\u001b[39;00m]\n\u001b[0;32m    525\u001b[0m sizes \u001b[38;5;241m=\u001b[39m gather(size)\u001b[38;5;241m.\u001b[39mcpu()\n\u001b[0;32m    526\u001b[0m \u001b[38;5;66;03m# Then pad to the maximum size\u001b[39;00m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "from transformers import BertForSequenceClassification, Trainer, TrainingArguments\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from transformers import Trainer, TrainingArguments\n",
    "from transformers import EarlyStoppingCallback, IntervalStrategy\n",
    "from sklearn.metrics import r2_score, accuracy_score\n",
    "import numpy as np\n",
    "\n",
    "def compute_metrics1(p):\n",
    "\n",
    "    preds = p.predictions.flatten()\n",
    "    labels = p.label_ids.flatten()\n",
    "\n",
    "    r2 = r2_score(labels, preds)\n",
    "    mse = mean_squared_error(labels, preds)\n",
    "    \n",
    "    return {\"r2_score\": r2, \"mse\": mse}\n",
    "\n",
    "def compute_metrics_classification(p):\n",
    "    preds = np.argmax(p.predictions , axis=1)\n",
    "    labels = p.label_ids\n",
    "    accuracy = accuracy_score(labels, preds)\n",
    "    return {\"accuracy_score\": accuracy}\n",
    "\n",
    "def compute_metrics(p):\n",
    "    prediction_scores, labels_ids = p\n",
    "    #print('here')\n",
    "    #print(prediction_scores)\n",
    "\n",
    "    mask = labels_ids != 100\n",
    "    #print(mask)\n",
    "    masked_predictions = prediction_scores[mask]\n",
    "    masked_labels = labels_ids[mask]\n",
    "\n",
    "    mse = mean_squared_error(masked_predictions, masked_labels)\n",
    "    return {\"mse\": mse}\n",
    "\n",
    "training_args = TrainingArguments(\n",
    "    output_dir='./results_task1',\n",
    "    num_train_epochs=150,\n",
    "    label_names=[\"labels_ids\"],\n",
    "    disable_tqdm = True,\n",
    "    #label_names=[\"labels_mask\"],\n",
    "    do_eval=True,\n",
    "    #learning_rate=0.001,\n",
    "    per_device_train_batch_size=batch,\n",
    "    per_device_eval_batch_size=batch,\n",
    "    logging_dir='./logs',\n",
    "    logging_strategy=\"steps\",\n",
    "    logging_steps=50,\n",
    "    evaluation_strategy=\"steps\",\n",
    "    eval_steps = 50,\n",
    "    save_strategy=\"steps\",\n",
    "    save_steps=50,\n",
    "\n",
    "    save_total_limit=2,\n",
    "    load_best_model_at_end=True,\n",
    ")\n",
    "\n",
    "trainer = CustomTrainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    train_dataset=train_dataset,\n",
    "    eval_dataset=val_dataset,\n",
    "    compute_metrics=compute_metrics1, #compute_metrics1,#compute_metrics_classification,\n",
    "    callbacks = [EarlyStoppingCallback(early_stopping_patience=50)]\n",
    ")\n",
    "\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "cac7d37e",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = reservoirtransformers.ReservoirTTimeSeries.from_pretrained(\"results_task1/checkpoint-1500\", config=configuration).to(\"cuda\", dtype=float)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "827f3400",
   "metadata": {},
   "outputs": [],
   "source": [
    "cnt = 0\n",
    "ln = len(X_test)\n",
    "y_pred = []\n",
    "y_test1 = []\n",
    "while cnt < ln:\n",
    "    #print(cnt, ln)\n",
    "    input_ids = torch.stack(X_test[cnt:cnt+batch], dim=0)\n",
    "    #y_test1 = y_test1 + [k.detach().numpy().flatten() for k in y_test[cnt:cnt+64]]\n",
    "    \n",
    "    output = model(inputs_embeds = input_ids.to(model.device))['logits']\n",
    "    y_pred = y_pred + list(output.cpu().detach().numpy().reshape(output.size(0), -1))\n",
    "    #y_test = y_test + labels_ids\n",
    "    \n",
    "    cnt=cnt+batch\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "1fae404b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.43932385042187827"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "y_test1 = [i.detach().numpy().flatten() for i in y_test]\n",
    "\n",
    "mse = mean_squared_error(y_test1, y_pred)\n",
    "mse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dd788b05",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
