{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "7F-sOZwhKfun"
   },
   "source": [
    "Hey Guys , I have made a Tutorial for Seq2Seq Machine Translation from scratch. Hope you like it. Upvote :)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
    "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 218
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 4515,
     "status": "ok",
     "timestamp": 1600764020348,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "ykbPZi7zKfuo",
    "outputId": "1f681a7f-90a5-4b48-e6a4-f2903495901e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: http://pypi.douban.com/simple/\n",
      "Requirement already satisfied: torchtext==0.6.0 in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (0.6.0)\n",
      "Requirement already satisfied: tqdm in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from torchtext==0.6.0) (4.66.5)\n",
      "Requirement already satisfied: requests in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from torchtext==0.6.0) (2.32.3)\n",
      "Requirement already satisfied: torch in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from torchtext==0.6.0) (1.8.0)\n",
      "Requirement already satisfied: numpy in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from torchtext==0.6.0) (1.21.6)\n",
      "Requirement already satisfied: six in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from torchtext==0.6.0) (1.16.0)\n",
      "Requirement already satisfied: sentencepiece in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from torchtext==0.6.0) (0.2.0)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from requests->torchtext==0.6.0) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from requests->torchtext==0.6.0) (3.8)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from requests->torchtext==0.6.0) (2.2.2)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from requests->torchtext==0.6.0) (2024.8.30)\n",
      "Requirement already satisfied: typing-extensions in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from torch->torchtext==0.6.0) (4.12.2)\n",
      "Requirement already satisfied: colorama in d:\\anaconda_download\\envs\\kan\\lib\\site-packages (from tqdm->torchtext==0.6.0) (0.4.6)\n"
     ]
    }
   ],
   "source": [
    "# Import Libraries\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import torch.optim as optim\n",
    "from torchtext.datasets import Multi30k #German to English dataset\n",
    "from torchtext.data import Field, BucketIterator\n",
    "import numpy as np\n",
    "import spacy\n",
    "import random\n",
    "from torch.utils.tensorboard import SummaryWriter  # to print to tensorboard\n",
    "import torch\n",
    "import spacy\n",
    "!pip install torchtext==0.6.0\n",
    "import torchtext.data\n",
    "from torchtext.data.metrics import bleu_score\n",
    "import sys"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "y1fGgVBDKfus"
   },
   "source": [
    "![image.png](attachment:image.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "_kg_hide-output": true,
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 642
    },
    "colab_type": "code",
    "collapsed": true,
    "executionInfo": {
     "elapsed": 13144,
     "status": "ok",
     "timestamp": 1600764028986,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "-yeVJvpPKfus",
    "outputId": "fc30eac6-cf53-4635-a15a-ff541b33c08e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: http://pypi.douban.com/simple/\n",
      "Collecting de-core-news-sm==3.8.0\n",
      "  Downloading https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-3.8.0/de_core_news_sm-3.8.0-py3-none-any.whl (14.6 MB)\n",
      "     ---------------------------------------- 0.0/14.6 MB ? eta -:--:--\n",
      "      --------------------------------------- 0.3/14.6 MB ? eta -:--:--\n",
      "     --- ------------------------------------ 1.3/14.6 MB 7.5 MB/s eta 0:00:02\n",
      "     ------- -------------------------------- 2.6/14.6 MB 5.8 MB/s eta 0:00:03\n",
      "     ---------- ----------------------------- 3.7/14.6 MB 5.5 MB/s eta 0:00:03\n",
      "     --------------- ------------------------ 5.8/14.6 MB 6.5 MB/s eta 0:00:02\n",
      "     ---------------------- ----------------- 8.1/14.6 MB 7.5 MB/s eta 0:00:01\n",
      "     ---------------------------- ----------- 10.5/14.6 MB 8.2 MB/s eta 0:00:01\n",
      "     ----------------------------------- ---- 13.1/14.6 MB 8.7 MB/s eta 0:00:01\n",
      "     ---------------------------------------- 14.6/14.6 MB 8.8 MB/s eta 0:00:00\n",
      "Installing collected packages: de-core-news-sm\n",
      "Successfully installed de-core-news-sm-3.8.0\n",
      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
      "You can now load the package via spacy.load('de_core_news_sm')\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING: The repository located at pypi.douban.com is not a trusted or secure host and is being ignored. If this repository is available via HTTPS we recommend you use HTTPS instead, otherwise you may silence this warning and allow it anyway with '--trusted-host pypi.douban.com'.\n"
     ]
    }
   ],
   "source": [
    "!python -m spacy download de_core_news_sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: http://pypi.douban.com/simple/\n",
      "Collecting en-core-web-sm==3.8.0\n",
      "  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)\n",
      "     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--\n",
      "     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--\n",
      "     ---- ----------------------------------- 1.6/12.8 MB 7.0 MB/s eta 0:00:02\n",
      "     ------ --------------------------------- 2.1/12.8 MB 5.1 MB/s eta 0:00:03\n",
      "     ---------- ----------------------------- 3.4/12.8 MB 5.2 MB/s eta 0:00:02\n",
      "     ----------------- ---------------------- 5.5/12.8 MB 6.3 MB/s eta 0:00:02\n",
      "     ------------------------ --------------- 7.9/12.8 MB 7.3 MB/s eta 0:00:01\n",
      "     ------------------------------- -------- 10.2/12.8 MB 7.9 MB/s eta 0:00:01\n",
      "     -------------------------------------- - 12.3/12.8 MB 8.4 MB/s eta 0:00:01\n",
      "     ---------------------------------------- 12.8/12.8 MB 8.0 MB/s eta 0:00:00\n",
      "Installing collected packages: en-core-web-sm\n",
      "Successfully installed en-core-web-sm-3.8.0\n",
      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
      "You can now load the package via spacy.load('en_core_web_sm')\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING: The repository located at pypi.douban.com is not a trusted or secure host and is being ignored. If this repository is available via HTTPS we recommend you use HTTPS instead, otherwise you may silence this warning and allow it anyway with '--trusted-host pypi.douban.com'.\n"
     ]
    }
   ],
   "source": [
    "!python -m spacy download en_core_web_sm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
    "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a",
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 15287,
     "status": "ok",
     "timestamp": 1600764031134,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "vc95bSrWKfuw"
   },
   "outputs": [],
   "source": [
    "# Loading Tokeniser in german and English\n",
    "spacy_ger = spacy.load(\"de_core_news_sm\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "spacy_eng = spacy.load(\"en_core_web_sm\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 15284,
     "status": "ok",
     "timestamp": 1600764031135,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "JinX1f6QKfuz"
   },
   "outputs": [],
   "source": [
    "# Tokenization of German Language\n",
    "def tokenize_ger(text):\n",
    "    return [tok.text for tok in spacy_ger.tokenizer(text)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 15279,
     "status": "ok",
     "timestamp": 1600764031135,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "Avd2uijgKfu2"
   },
   "outputs": [],
   "source": [
    "# Tokenization of English Language\n",
    "\n",
    "def tokenize_eng(text):\n",
    "    return [tok.text for tok in spacy_eng.tokenizer(text)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "colab_type": "text",
    "id": "-zP-N5ToKfu6"
   },
   "source": [
    "## Preprocessing of Text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 15275,
     "status": "ok",
     "timestamp": 1600764031136,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "o8t7cvVcKfu7"
   },
   "outputs": [],
   "source": [
    "# Applyling Tokenization , lowercase and special Tokens for preprocessing\n",
    "german = Field(tokenize = tokenize_ger,lower = True,init_token = '<sos>',eos_token = '<eos>')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 15271,
     "status": "ok",
     "timestamp": 1600764031136,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "9PkvjAIqKfu-"
   },
   "outputs": [],
   "source": [
    "english = Field(tokenize = tokenize_eng,lower = True,init_token = '<sos>',eos_token = '<eos>')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data loaded successfully!\n"
     ]
    }
   ],
   "source": [
    "from torchtext.datasets import Multi30k\n",
    "\n",
    "# 检查数据集加载\n",
    "try:\n",
    "    train_data, valid_data, test_data = Multi30k.splits(\n",
    "        exts=(\".de\", \".en\"), fields=(german, english)\n",
    "    )\n",
    "    print(\"Data loaded successfully!\")\n",
    "except FileNotFoundError as e:\n",
    "    print(f\"Error: {e}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 27404,
     "status": "ok",
     "timestamp": 1600764043278,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "fJF0wjBmKfvE"
   },
   "outputs": [],
   "source": [
    "# Creating vocabulary in each language\n",
    "german.build_vocab(train_data,max_size = 10000,min_freq = 2)\n",
    "english.build_vocab(train_data,max_size = 10000,min_freq = 2)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 27400,
     "status": "ok",
     "timestamp": 1600764043278,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "96Sm0iJJKfvG"
   },
   "outputs": [],
   "source": [
    "\n",
    "# Defining the Encoder part of the model\n",
    "class Encoder(nn.Module):\n",
    "    \n",
    "    def __init__(self, input_size, embedding_size, hidden_size, num_layers, p):\n",
    "        super(Encoder, self).__init__()\n",
    "        self.dropout = nn.Dropout(p)\n",
    "        self.hidden_size = hidden_size\n",
    "        self.num_layers = num_layers\n",
    "\n",
    "        self.embedding = nn.Embedding(input_size, embedding_size)\n",
    "        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)\n",
    "\n",
    "    def forward(self, x):\n",
    "        # x shape: (seq_length, N) where N is batch size\n",
    "\n",
    "        embedding = self.dropout(torch.relu(self.embedding(x)))\n",
    "        # embedding shape: (seq_length, N, embedding_size)\n",
    "\n",
    "        outputs, (hidden, cell) = self.rnn(embedding)\n",
    "        # outputs shape: (seq_length, N, hidden_size)\n",
    "\n",
    "        return hidden, cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 27396,
     "status": "ok",
     "timestamp": 1600764043278,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "aiyK-YsTKfvJ"
   },
   "outputs": [],
   "source": [
    "# Defining the Decoder part\n",
    "\n",
    "class Decoder(nn.Module):\n",
    "    def __init__(\n",
    "        self, input_size, embedding_size, hidden_size, output_size, num_layers, p):\n",
    "        super(Decoder, self).__init__()\n",
    "        self.dropout = nn.Dropout(p)\n",
    "        self.hidden_size = hidden_size\n",
    "        self.num_layers = num_layers\n",
    "\n",
    "        self.embedding = nn.Embedding(input_size, embedding_size)\n",
    "        self.rnn = nn.LSTM(embedding_size, hidden_size, num_layers, dropout=p)\n",
    "        self.fc = nn.Linear(hidden_size, output_size)\n",
    "\n",
    "    def forward(self, x, hidden, cell):\n",
    "        # x shape: (N) where N is for batch size, we want it to be (1, N), seq_length\n",
    "        # is 1 here because we are sending in a single word and not a sentence\n",
    "        x = x.unsqueeze(0)\n",
    "\n",
    "        embedding = self.dropout(torch.relu(self.embedding(x)))\n",
    "        # embedding shape: (1, N, embedding_size)\n",
    "\n",
    "        outputs, (hidden, cell) = self.rnn(embedding, (hidden, cell))\n",
    "        # outputs shape: (1, N, hidden_size)\n",
    "\n",
    "        predictions = self.fc(outputs)\n",
    "\n",
    "        # predictions shape: (1, N, length_target_vocabulary) to send it to\n",
    "        # loss function we want it to be (N, length_target_vocabulary) so we're\n",
    "        # just gonna remove the first dim\n",
    "        predictions = predictions.squeeze(0)\n",
    "\n",
    "        return predictions, hidden, cell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 27720,
     "status": "ok",
     "timestamp": 1600764043607,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "mdJJXlGyKfvM"
   },
   "outputs": [],
   "source": [
    "# Defining the complete model\n",
    "class Seq2Seq(nn.Module):\n",
    "    def __init__(self, encoder, decoder):\n",
    "        super(Seq2Seq, self).__init__()\n",
    "        self.encoder = encoder\n",
    "        self.decoder = decoder\n",
    "\n",
    "    def forward(self, source, target, teacher_force_ratio=0.5):\n",
    "        batch_size = source.shape[1]\n",
    "        target_len = target.shape[0]\n",
    "        target_vocab_size = len(english.vocab)\n",
    "\n",
    "        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)\n",
    "\n",
    "        hidden, cell = self.encoder(source)\n",
    "\n",
    "        # Grab the first input to the Decoder which will be <SOS> token\n",
    "        x = target[0]\n",
    "\n",
    "        for t in range(1, target_len):\n",
    "            # Use previous hidden, cell as context from encoder at start\n",
    "            output, hidden, cell = self.decoder(x, hidden, cell)\n",
    "\n",
    "            # Store next output prediction\n",
    "            outputs[t] = output\n",
    "\n",
    "            # Get the best word the Decoder predicted (index in the vocabulary)\n",
    "            best_guess = output.argmax(1)\n",
    "\n",
    "            # With probability of teacher_force_ratio we take the actual next word\n",
    "            # otherwise we take the word that the Decoder predicted it to be.\n",
    "            # Teacher Forcing is used so that the model gets used to seeing\n",
    "            # similar inputs at training and testing time, if teacher forcing is 1\n",
    "            # then inputs at test time might be completely different than what the\n",
    "            # network is used to. This was a long comment.\n",
    "            x = target[t] if random.random() < teacher_force_ratio else best_guess\n",
    "\n",
    "        return outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 27717,
     "status": "ok",
     "timestamp": 1600764043608,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "5ziLyXFxKfvP"
   },
   "outputs": [],
   "source": [
    "# Hyperparameters\n",
    "num_epochs = 50\n",
    "learning_rate = 0.001\n",
    "batch_size = 256\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 27712,
     "status": "ok",
     "timestamp": 1600764043609,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "Vr3ZyNckKfvS"
   },
   "outputs": [],
   "source": [
    "# Model hyperparameters\n",
    "load_model = False\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else 'cpu')\n",
    "input_size_encoder = len(german.vocab)\n",
    "input_size_decoder = len(english.vocab)\n",
    "output_size = len(english.vocab)\n",
    "encoder_embedding_size = 300\n",
    "decoder_embedding_size = 300\n",
    "\n",
    "hidden_size = 1024\n",
    "num_layers = 1\n",
    "enc_dropout = 0.5\n",
    "dec_dropout = 0.5\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 29520,
     "status": "ok",
     "timestamp": 1600764045421,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "6DZ7zj8LKfvU"
   },
   "outputs": [],
   "source": [
    "# Tensorboard to get nice loss plot\n",
    "writer = SummaryWriter(f'runs/Loss_plot')\n",
    "step = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 29517,
     "status": "ok",
     "timestamp": 1600764045422,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "z6g5kRsiKfvX"
   },
   "outputs": [],
   "source": [
    "train_iterator, validation_iterator, test_iterator = BucketIterator.splits(\n",
    "    (train_data, valid_data, test_data),\n",
    "     batch_size = batch_size, sort_within_batch = True, \n",
    "     sort_key = lambda x:len(x.src),\n",
    "     device = device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 70
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 43431,
     "status": "ok",
     "timestamp": 1600764059341,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "lwapcC4mKfva",
    "outputId": "8579d0fc-8857-4a6f-cf7f-b673f04f35a2"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\anaconda_download\\envs\\kan\\lib\\site-packages\\torch\\nn\\modules\\rnn.py:60: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.5 and num_layers=1\n",
      "  warnings.warn(\"dropout option adds dropout after all but last \"\n"
     ]
    }
   ],
   "source": [
    "encoder_net = Encoder(input_size_encoder, \n",
    "                      encoder_embedding_size,\n",
    "                      hidden_size,num_layers, \n",
    "                      enc_dropout).to(device)\n",
    "\n",
    "\n",
    "decoder_net = Decoder(input_size_decoder, \n",
    "                      decoder_embedding_size,\n",
    "                      hidden_size,output_size,num_layers, \n",
    "                      dec_dropout).to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 43428,
     "status": "ok",
     "timestamp": 1600764059343,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "Liy7CquOKfvd"
   },
   "outputs": [],
   "source": [
    "model = Seq2Seq(encoder_net, decoder_net).to(device)\n",
    "optimizer = optim.Adam(model.parameters(), lr=learning_rate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 43425,
     "status": "ok",
     "timestamp": 1600764059344,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "7ex-6pH0Kfvf"
   },
   "outputs": [],
   "source": [
    "pad_idx = english.vocab.stoi['<pad>']\n",
    "criterion = nn.CrossEntropyLoss(ignore_index = pad_idx)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 43422,
     "status": "ok",
     "timestamp": 1600764059345,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "f6ioRsy5Kfvh"
   },
   "outputs": [],
   "source": [
    "def translate_sentence(model, sentence, german, english, device, max_length=50):\n",
    "    # print(sentence)\n",
    "\n",
    "    # sys.exit()\n",
    "\n",
    "    # Load german tokenizer\n",
    "    spacy_ger = spacy.load(\"de\")\n",
    "\n",
    "    # Create tokens using spacy and everything in lower case (which is what our vocab is)\n",
    "    if type(sentence) == str:\n",
    "        tokens = [token.text.lower() for token in spacy_ger(sentence)]\n",
    "    else:\n",
    "        tokens = [token.lower() for token in sentence]\n",
    "\n",
    "    # print(tokens)\n",
    "\n",
    "    # sys.exit()\n",
    "    # Add <SOS> and <EOS> in beginning and end respectively\n",
    "    tokens.insert(0, german.init_token)\n",
    "    tokens.append(german.eos_token)\n",
    "\n",
    "    # Go through each german token and convert to an index\n",
    "    text_to_indices = [german.vocab.stoi[token] for token in tokens]\n",
    "\n",
    "    # Convert to Tensor\n",
    "    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)\n",
    "\n",
    "    # Build encoder hidden, cell state\n",
    "    with torch.no_grad():\n",
    "        hidden, cell = model.encoder(sentence_tensor)\n",
    "\n",
    "    outputs = [english.vocab.stoi[\"<sos>\"]]\n",
    "\n",
    "    for _ in range(max_length):\n",
    "        previous_word = torch.LongTensor([outputs[-1]]).to(device)\n",
    "\n",
    "        with torch.no_grad():\n",
    "            output, hidden, cell = model.decoder(previous_word, hidden, cell)\n",
    "            best_guess = output.argmax(1).item()\n",
    "\n",
    "        outputs.append(best_guess)\n",
    "\n",
    "        # Model predicts it's the end of the sentence\n",
    "        if output.argmax(1).item() == english.vocab.stoi[\"<eos>\"]:\n",
    "            break\n",
    "\n",
    "    translated_sentence = [english.vocab.itos[idx] for idx in outputs]\n",
    "\n",
    "    # remove start token\n",
    "    return translated_sentence[1:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 43417,
     "status": "ok",
     "timestamp": 1600764059345,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "rm9yIYvIKfvj"
   },
   "outputs": [],
   "source": [
    "def save_checkpoint(state, filename=\"my_checkpoint.pth.tar\"):\n",
    "    print(\"=> Saving checkpoint\")\n",
    "    torch.save(state, filename)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 43413,
     "status": "ok",
     "timestamp": 1600764059345,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "WbNauSlDKfvm"
   },
   "outputs": [],
   "source": [
    "def load_checkpoint(checkpoint, model, optimizer):\n",
    "    print(\"=> Loading checkpoint\")\n",
    "    model.load_state_dict(checkpoint[\"state_dict\"])\n",
    "    optimizer.load_state_dict(checkpoint[\"optimizer\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 43409,
     "status": "ok",
     "timestamp": 1600764059346,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "CiNGFIGGKfvo"
   },
   "outputs": [],
   "source": [
    "if load_model:\n",
    "    load_checkpoint(torch.load(\"my_checkpoint.pth.tar\"), model, optimizer)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 43405,
     "status": "ok",
     "timestamp": 1600764059347,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "rOiYTY4DKfvq"
   },
   "outputs": [],
   "source": [
    "sentence = \"Cristiano Ronaldo ist ein großartiger Fußballspieler mit erstaunlichen Fähigkeiten und Talenten.\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1089375,
     "status": "ok",
     "timestamp": 1600765105321,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "ujHJNl4oKfvt",
    "outputId": "ee6aa103-9c46-41d2-9865-ebf30c967e3e"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[Epoch 0 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 1 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 2 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 3 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 4 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 5 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 6 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 7 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 8 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 9 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 10 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 11 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 12 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 13 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 14 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 15 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 16 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 17 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 18 / 50]\n",
      "=> Saving checkpoint\n",
      "[Epoch 19 / 50]\n",
      "=> Saving checkpoint\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[33], line 38\u001b[0m\n\u001b[0;32m     35\u001b[0m loss \u001b[38;5;241m=\u001b[39m criterion(output, target)\n\u001b[0;32m     37\u001b[0m \u001b[38;5;66;03m# Back prop\u001b[39;00m\n\u001b[1;32m---> 38\u001b[0m \u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     40\u001b[0m \u001b[38;5;66;03m# Clip to avoid exploding gradient issues, makes sure grads are\u001b[39;00m\n\u001b[0;32m     41\u001b[0m \u001b[38;5;66;03m# within a healthy range\u001b[39;00m\n\u001b[0;32m     42\u001b[0m torch\u001b[38;5;241m.\u001b[39mnn\u001b[38;5;241m.\u001b[39mutils\u001b[38;5;241m.\u001b[39mclip_grad_norm_(model\u001b[38;5;241m.\u001b[39mparameters(), max_norm\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n",
      "File \u001b[1;32md:\\anaconda_download\\envs\\kan\\lib\\site-packages\\torch\\tensor.py:245\u001b[0m, in \u001b[0;36mTensor.backward\u001b[1;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[0;32m    236\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m    237\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[0;32m    238\u001b[0m         Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[0;32m    239\u001b[0m         (\u001b[38;5;28mself\u001b[39m,),\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    243\u001b[0m         create_graph\u001b[38;5;241m=\u001b[39mcreate_graph,\n\u001b[0;32m    244\u001b[0m         inputs\u001b[38;5;241m=\u001b[39minputs)\n\u001b[1;32m--> 245\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32md:\\anaconda_download\\envs\\kan\\lib\\site-packages\\torch\\autograd\\__init__.py:145\u001b[0m, in \u001b[0;36mbackward\u001b[1;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[0;32m    142\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m retain_graph \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m    143\u001b[0m     retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[1;32m--> 145\u001b[0m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    146\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    147\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "for epoch in range(num_epochs):\n",
    "    print(f\"[Epoch {epoch} / {num_epochs}]\")\n",
    "\n",
    "    checkpoint = {\"state_dict\": model.state_dict(), \"optimizer\": optimizer.state_dict()}\n",
    "    save_checkpoint(checkpoint)\n",
    "\n",
    "    model.eval()\n",
    "\n",
    "    #translated_sentence = translate_sentence(\n",
    "    #    model, sentence, german, english, device, max_length=50\n",
    "    #)\n",
    "\n",
    "    #print(f\"Translated example sentence: \\n {translated_sentence}\")\n",
    "\n",
    "    model.train()\n",
    "\n",
    "    for batch_idx, batch in enumerate(train_iterator):\n",
    "        # Get input and targets and get to cuda\n",
    "        inp_data = batch.src.to(device)\n",
    "        target = batch.trg.to(device)\n",
    "\n",
    "        # Forward prop\n",
    "        output = model(inp_data, target)\n",
    "\n",
    "        # Output is of shape (trg_len, batch_size, output_dim) but Cross Entropy Loss\n",
    "        # doesn't take input in that form. For example if we have MNIST we want to have\n",
    "        # output to be: (N, 10) and targets just (N). Here we can view it in a similar\n",
    "        # way that we have output_words * batch_size that we want to send in into\n",
    "        # our cost function, so we need to do some reshapin. While we're at it\n",
    "        # Let's also remove the start token while we're at it\n",
    "        output = output[1:].reshape(-1, output.shape[2])\n",
    "        target = target[1:].reshape(-1)\n",
    "\n",
    "        optimizer.zero_grad()\n",
    "        loss = criterion(output, target)\n",
    "\n",
    "        # Back prop\n",
    "        loss.backward()\n",
    "\n",
    "        # Clip to avoid exploding gradient issues, makes sure grads are\n",
    "        # within a healthy range\n",
    "        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)\n",
    "\n",
    "        # Gradient descent step\n",
    "        optimizer.step()\n",
    "\n",
    "        # Plot to tensorboard\n",
    "        writer.add_scalar(\"Training loss\", loss, global_step=step)\n",
    "        step += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1089361,
     "status": "ok",
     "timestamp": 1600765105327,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "Ir5lIeOwKfvv"
   },
   "outputs": [],
   "source": [
    "def bleu(data, model, german, english, device):\n",
    "    targets = []\n",
    "    outputs = []\n",
    "\n",
    "    for example in data:\n",
    "        src = vars(example)[\"src\"]\n",
    "        trg = vars(example)[\"trg\"]\n",
    "\n",
    "        prediction = translate_sentence(model, src, german, english, device)\n",
    "        prediction = prediction[:-1]  # remove <eos> token\n",
    "\n",
    "        targets.append([trg])\n",
    "        outputs.append(prediction)\n",
    "\n",
    "    return bleu_score(outputs, targets)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1089355,
     "status": "ok",
     "timestamp": 1600765105328,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "hvYMk4hYKfvy"
   },
   "outputs": [],
   "source": [
    "import torch\n",
    "import spacy\n",
    "from torchtext.data.metrics import bleu_score\n",
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 34
    },
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1218846,
     "status": "ok",
     "timestamp": 1600765234827,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "f-bW5z8oKfv0",
    "outputId": "c8f390ee-151e-415f-d581-3b9e39c77a98"
   },
   "outputs": [
    {
     "ename": "OSError",
     "evalue": "[E941] Can't find model 'de'. It looks like you're trying to load a model from a shortcut, which is obsolete as of spaCy v3.0. To load the model, use its full name instead:\n\nnlp = spacy.load(\"de_core_news_sm\")\n\nFor more details on the available models, see the models directory: https://spacy.io/models and if you want to create a blank model, use spacy.blank: nlp = spacy.blank(\"de\")",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mOSError\u001b[0m                                   Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[36], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m score \u001b[38;5;241m=\u001b[39m \u001b[43mbleu\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtest_data\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m100\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgerman\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menglish\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBleu score \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mscore\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m100\u001b[39m\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124m.2f\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
      "Cell \u001b[1;32mIn[34], line 9\u001b[0m, in \u001b[0;36mbleu\u001b[1;34m(data, model, german, english, device)\u001b[0m\n\u001b[0;32m      6\u001b[0m src \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mvars\u001b[39m(example)[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msrc\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m      7\u001b[0m trg \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mvars\u001b[39m(example)[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrg\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m----> 9\u001b[0m prediction \u001b[38;5;241m=\u001b[39m \u001b[43mtranslate_sentence\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msrc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgerman\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43menglish\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     10\u001b[0m prediction \u001b[38;5;241m=\u001b[39m prediction[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]  \u001b[38;5;66;03m# remove <eos> token\u001b[39;00m\n\u001b[0;32m     12\u001b[0m targets\u001b[38;5;241m.\u001b[39mappend([trg])\n",
      "Cell \u001b[1;32mIn[28], line 7\u001b[0m, in \u001b[0;36mtranslate_sentence\u001b[1;34m(model, sentence, german, english, device, max_length)\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mtranslate_sentence\u001b[39m(model, sentence, german, english, device, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50\u001b[39m):\n\u001b[0;32m      2\u001b[0m     \u001b[38;5;66;03m# print(sentence)\u001b[39;00m\n\u001b[0;32m      3\u001b[0m \n\u001b[0;32m      4\u001b[0m     \u001b[38;5;66;03m# sys.exit()\u001b[39;00m\n\u001b[0;32m      5\u001b[0m \n\u001b[0;32m      6\u001b[0m     \u001b[38;5;66;03m# Load german tokenizer\u001b[39;00m\n\u001b[1;32m----> 7\u001b[0m     spacy_ger \u001b[38;5;241m=\u001b[39m \u001b[43mspacy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mde\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m      9\u001b[0m     \u001b[38;5;66;03m# Create tokens using spacy and everything in lower case (which is what our vocab is)\u001b[39;00m\n\u001b[0;32m     10\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(sentence) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mstr\u001b[39m:\n",
      "File \u001b[1;32md:\\anaconda_download\\envs\\kan\\lib\\site-packages\\spacy\\__init__.py:51\u001b[0m, in \u001b[0;36mload\u001b[1;34m(name, vocab, disable, enable, exclude, config)\u001b[0m\n\u001b[0;32m     27\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload\u001b[39m(\n\u001b[0;32m     28\u001b[0m     name: Union[\u001b[38;5;28mstr\u001b[39m, Path],\n\u001b[0;32m     29\u001b[0m     \u001b[38;5;241m*\u001b[39m,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     34\u001b[0m     config: Union[Dict[\u001b[38;5;28mstr\u001b[39m, Any], Config] \u001b[38;5;241m=\u001b[39m util\u001b[38;5;241m.\u001b[39mSimpleFrozenDict(),\n\u001b[0;32m     35\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Language:\n\u001b[0;32m     36\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Load a spaCy model from an installed package or a local path.\u001b[39;00m\n\u001b[0;32m     37\u001b[0m \n\u001b[0;32m     38\u001b[0m \u001b[38;5;124;03m    name (str): Package name or model path.\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m     49\u001b[0m \u001b[38;5;124;03m    RETURNS (Language): The loaded nlp object.\u001b[39;00m\n\u001b[0;32m     50\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m---> 51\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mutil\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m     52\u001b[0m \u001b[43m        \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     53\u001b[0m \u001b[43m        \u001b[49m\u001b[43mvocab\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvocab\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     54\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdisable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdisable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     55\u001b[0m \u001b[43m        \u001b[49m\u001b[43menable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     56\u001b[0m \u001b[43m        \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     57\u001b[0m \u001b[43m        \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m     58\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32md:\\anaconda_download\\envs\\kan\\lib\\site-packages\\spacy\\util.py:471\u001b[0m, in \u001b[0;36mload_model\u001b[1;34m(name, vocab, disable, enable, exclude, config)\u001b[0m\n\u001b[0;32m    469\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m load_model_from_path(name, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[0;32m    470\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m OLD_MODEL_SHORTCUTS:\n\u001b[1;32m--> 471\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(Errors\u001b[38;5;241m.\u001b[39mE941\u001b[38;5;241m.\u001b[39mformat(name\u001b[38;5;241m=\u001b[39mname, full\u001b[38;5;241m=\u001b[39mOLD_MODEL_SHORTCUTS[name]))  \u001b[38;5;66;03m# type: ignore[index]\u001b[39;00m\n\u001b[0;32m    472\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(Errors\u001b[38;5;241m.\u001b[39mE050\u001b[38;5;241m.\u001b[39mformat(name\u001b[38;5;241m=\u001b[39mname))\n",
      "\u001b[1;31mOSError\u001b[0m: [E941] Can't find model 'de'. It looks like you're trying to load a model from a shortcut, which is obsolete as of spaCy v3.0. To load the model, use its full name instead:\n\nnlp = spacy.load(\"de_core_news_sm\")\n\nFor more details on the available models, see the models directory: https://spacy.io/models and if you want to create a blank model, use spacy.blank: nlp = spacy.blank(\"de\")"
     ]
    }
   ],
   "source": [
    "\n",
    "score = bleu(test_data[1:100], model, german, english, device)\n",
    "print(f\"Bleu score {score*100:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "executionInfo": {
     "elapsed": 1218840,
     "status": "ok",
     "timestamp": 1600765234828,
     "user": {
      "displayName": "Shiv Ram Dubey",
      "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14Gjtd_B-VgS3jkucANDIMEI21-5JajxJBYL4BagKOA=s64",
      "userId": "14553895138990175535"
     },
     "user_tz": -330
    },
    "id": "-PN1sqf5Kfv2"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "name": "ReLU_Seq2Seq-MachineTranslation1.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "TS_LIB",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
