{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "80406b79-eb7c-4bbf-92eb-0c48f14afd9a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-08-19 01:40:50.345535: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import torch\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import gensim\n",
    "import ast\n",
    "import umap\n",
    "from sentence_transformers import SentenceTransformer\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "from sklearn.cluster import KMeans\n",
    "from sklearn.decomposition import PCA\n",
    "from embedding4bert import Embedding4BERT\n",
    "from sklearn.metrics import adjusted_rand_score\n",
    "from sklearn.manifold import TSNE\n",
    "import matplotlib.pyplot as plt\n",
    "from embedding4bert import Embedding4BERT\n",
    "import csv\n",
    "from scipy.spatial import distance\n",
    "from torch import nn\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "from torchvision import transforms, utils\n",
    "import torch.nn.functional as F\n",
    "import pickle\n",
    "import random\n",
    "import math\n",
    "from tqdm import tqdm\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "1d6f6eda-106f-4b5e-b2f8-bd4793b580ef",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mon Aug 19 01:40:52 2024       \n",
      "+-----------------------------------------------------------------------------+\n",
      "| NVIDIA-SMI 525.125.06   Driver Version: 525.125.06   CUDA Version: 12.0     |\n",
      "|-------------------------------+----------------------+----------------------+\n",
      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
      "|                               |                      |               MIG M. |\n",
      "|===============================+======================+======================|\n",
      "|   0  Tesla V100-PCIE...  Off  | 00000000:3B:00.0 Off |                    0 |\n",
      "| N/A   38C    P0    39W / 250W |    828MiB / 32768MiB |      0%      Default |\n",
      "|                               |                      |                  N/A |\n",
      "+-------------------------------+----------------------+----------------------+\n",
      "|   1  Tesla V100-PCIE...  Off  | 00000000:86:00.0 Off |                  Off |\n",
      "| N/A   36C    P0    37W / 250W |    572MiB / 16384MiB |      0%      Default |\n",
      "|                               |                      |                  N/A |\n",
      "+-------------------------------+----------------------+----------------------+\n",
      "                                                                               \n",
      "+-----------------------------------------------------------------------------+\n",
      "| Processes:                                                                  |\n",
      "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
      "|        ID   ID                                                   Usage      |\n",
      "|=============================================================================|\n",
      "|    0   N/A  N/A     16574      C   ...condaNewPython/bin/python      824MiB |\n",
      "|    1   N/A  N/A     16574      C   ...condaNewPython/bin/python      568MiB |\n",
      "+-----------------------------------------------------------------------------+\n"
     ]
    }
   ],
   "source": [
    "!nvidia-smi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "be851207-0f57-40f2-9505-0e91d7c4069d",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0.5</th>\n",
       "      <th>Unnamed: 0.4</th>\n",
       "      <th>Unnamed: 0.3</th>\n",
       "      <th>Unnamed: 0.2</th>\n",
       "      <th>Unnamed: 0.1</th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>Recipe_id</th>\n",
       "      <th>Region</th>\n",
       "      <th>Sub_region</th>\n",
       "      <th>Continent</th>\n",
       "      <th>ingredients</th>\n",
       "      <th>Instructions</th>\n",
       "      <th>Ingredient_List</th>\n",
       "      <th>Ing List Sent</th>\n",
       "      <th>Ing Inst</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3697</td>\n",
       "      <td>Indian Subcontinent</td>\n",
       "      <td>Bangladeshi</td>\n",
       "      <td>Asian</td>\n",
       "      <td>['dungeness crab', 'turmeric', 'salt', 'mustar...</td>\n",
       "      <td>rub the crabs with  teaspoon of the turmeric a...</td>\n",
       "      <td>['Indian_Subcontinent', 'dungeness_crab', 'tur...</td>\n",
       "      <td>This recipe from Indian_Subcontinent cuisine c...</td>\n",
       "      <td>This recipe from Indian_Subcontinent cuisine c...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3698</td>\n",
       "      <td>Indian Subcontinent</td>\n",
       "      <td>Bangladeshi</td>\n",
       "      <td>Asian</td>\n",
       "      <td>['tomato', 'cumin', 'turmeric', 'salt', 'water...</td>\n",
       "      <td>bring the tomatoes  cumin  turmeric  salt  and...</td>\n",
       "      <td>['Indian_Subcontinent', 'tomato', 'cumin', 'tu...</td>\n",
       "      <td>This recipe from Indian_Subcontinent cuisine c...</td>\n",
       "      <td>This recipe from Indian_Subcontinent cuisine c...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3699</td>\n",
       "      <td>Indian Subcontinent</td>\n",
       "      <td>Bangladeshi</td>\n",
       "      <td>Asian</td>\n",
       "      <td>['vegetable oil', 'onion', 'tomato', 'green ch...</td>\n",
       "      <td>heat  tablespoon oil in a skillet over medium ...</td>\n",
       "      <td>['Indian_Subcontinent', 'vegetable_oil', 'onio...</td>\n",
       "      <td>This recipe from Indian_Subcontinent cuisine c...</td>\n",
       "      <td>This recipe from Indian_Subcontinent cuisine c...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3700</td>\n",
       "      <td>Indian Subcontinent</td>\n",
       "      <td>Bangladeshi</td>\n",
       "      <td>Asian</td>\n",
       "      <td>['lamb shoulder', 'garam masala', 'salt', 'but...</td>\n",
       "      <td>season the lamb with garam_masala and salt   h...</td>\n",
       "      <td>['Indian_Subcontinent', 'lamb_shoulder', 'gara...</td>\n",
       "      <td>This recipe from Indian_Subcontinent cuisine c...</td>\n",
       "      <td>This recipe from Indian_Subcontinent cuisine c...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3701</td>\n",
       "      <td>Indian Subcontinent</td>\n",
       "      <td>Bangladeshi</td>\n",
       "      <td>Asian</td>\n",
       "      <td>['potato', 'pea', 'vegetable oil', 'cumin seed...</td>\n",
       "      <td>bring a medium saucepan of lightly salted wate...</td>\n",
       "      <td>['Indian_Subcontinent', 'potato', 'pea', 'vege...</td>\n",
       "      <td>This recipe from Indian_Subcontinent cuisine c...</td>\n",
       "      <td>This recipe from Indian_Subcontinent cuisine c...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0.5  Unnamed: 0.4  Unnamed: 0.3  Unnamed: 0.2  Unnamed: 0.1  \\\n",
       "0             0             0             0             0             0   \n",
       "1             1             1             1             1             1   \n",
       "2             2             2             2             2             2   \n",
       "3             3             3             3             3             3   \n",
       "4             4             4             4             4             4   \n",
       "\n",
       "   Unnamed: 0  Recipe_id               Region   Sub_region Continent  \\\n",
       "0           0       3697  Indian Subcontinent  Bangladeshi     Asian   \n",
       "1           1       3698  Indian Subcontinent  Bangladeshi     Asian   \n",
       "2           2       3699  Indian Subcontinent  Bangladeshi     Asian   \n",
       "3           3       3700  Indian Subcontinent  Bangladeshi     Asian   \n",
       "4           4       3701  Indian Subcontinent  Bangladeshi     Asian   \n",
       "\n",
       "                                         ingredients  \\\n",
       "0  ['dungeness crab', 'turmeric', 'salt', 'mustar...   \n",
       "1  ['tomato', 'cumin', 'turmeric', 'salt', 'water...   \n",
       "2  ['vegetable oil', 'onion', 'tomato', 'green ch...   \n",
       "3  ['lamb shoulder', 'garam masala', 'salt', 'but...   \n",
       "4  ['potato', 'pea', 'vegetable oil', 'cumin seed...   \n",
       "\n",
       "                                        Instructions  \\\n",
       "0  rub the crabs with  teaspoon of the turmeric a...   \n",
       "1  bring the tomatoes  cumin  turmeric  salt  and...   \n",
       "2  heat  tablespoon oil in a skillet over medium ...   \n",
       "3  season the lamb with garam_masala and salt   h...   \n",
       "4  bring a medium saucepan of lightly salted wate...   \n",
       "\n",
       "                                     Ingredient_List  \\\n",
       "0  ['Indian_Subcontinent', 'dungeness_crab', 'tur...   \n",
       "1  ['Indian_Subcontinent', 'tomato', 'cumin', 'tu...   \n",
       "2  ['Indian_Subcontinent', 'vegetable_oil', 'onio...   \n",
       "3  ['Indian_Subcontinent', 'lamb_shoulder', 'gara...   \n",
       "4  ['Indian_Subcontinent', 'potato', 'pea', 'vege...   \n",
       "\n",
       "                                       Ing List Sent  \\\n",
       "0  This recipe from Indian_Subcontinent cuisine c...   \n",
       "1  This recipe from Indian_Subcontinent cuisine c...   \n",
       "2  This recipe from Indian_Subcontinent cuisine c...   \n",
       "3  This recipe from Indian_Subcontinent cuisine c...   \n",
       "4  This recipe from Indian_Subcontinent cuisine c...   \n",
       "\n",
       "                                            Ing Inst  \n",
       "0  This recipe from Indian_Subcontinent cuisine c...  \n",
       "1  This recipe from Indian_Subcontinent cuisine c...  \n",
       "2  This recipe from Indian_Subcontinent cuisine c...  \n",
       "3  This recipe from Indian_Subcontinent cuisine c...  \n",
       "4  This recipe from Indian_Subcontinent cuisine c...  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df= pd.read_csv(\"Recipe DB Modified for BERT.csv\")\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "f56d1a9d-eb66-4334-be57-478d5127b0d0",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df['Ingredient_List']= df['Ingredient_List'].apply(ast.literal_eval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5a725178-1907-46f9-a1e3-59371b3a3790",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Canadian'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"Ingredient_List\"][51348][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "323f9d46-2cc3-4599-84be-525ea75a1a37",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "it_ings = [item for sublist in df[df[\"Region\"]==\"Italian\"][\"Ingredient_List\"].tolist() for item in sublist]\n",
    "mex_ings = [item for sublist in df[df[\"Region\"]==\"Mexican\"][\"Ingredient_List\"].tolist() for item in sublist]\n",
    "ca_ings = [item for sublist in df[df[\"Region\"]==\"Canadian\"][\"Ingredient_List\"].tolist() for item in sublist]\n",
    "in_ings = [item for sublist in df[df[\"Region\"]==\"Indian Subcontinent\"][\"Ingredient_List\"].tolist() for item in sublist]\n",
    "sa_ings = [item for sublist in df[df[\"Region\"]==\"South American\"][\"Ingredient_List\"].tolist() for item in sublist]\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "it_ings= list(set(it_ings))\n",
    "mex_ings= list(set(mex_ings))\n",
    "ca_ings= list(set(ca_ings))\n",
    "in_ings= list(set(in_ings))\n",
    "sa_ings= list(set(sa_ings))\n",
    "\n",
    "\n",
    "it_ings.remove(\"Italian\")\n",
    "mex_ings.remove(\"Mexican\")\n",
    "ca_ings.remove(\"Canadian\")\n",
    "in_ings.remove(\"Indian_Subcontinent\")\n",
    "sa_ings.remove(\"South_American\")\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "a6d102ae-1af4-449d-98cd-01b7167ba769",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "#it_ings.remove(\"italian\")\n",
    "#mex_ings.remove(\"mexican\")\n",
    "#ca_ings.remove(\"canadian\")\n",
    "#in_ings.remove(\"indian_subcontinent\")\n",
    "#sa_ings.remove(\"south_american\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "cf29a762-d9de-45bb-9722-252244a1ad72",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "it_ings.remove(\"italian\")\n",
    "mex_ings.remove(\"mexican\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "ceefa321-3d42-4d19-8256-4887e53c200b",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5264\n",
      "5071\n",
      "3408\n",
      "2657\n",
      "3497\n"
     ]
    }
   ],
   "source": [
    "#Number of italian ings\n",
    "\n",
    "print(len(it_ings))\n",
    "print(len(mex_ings))\n",
    "print(len(ca_ings))\n",
    "print(len(in_ings))\n",
    "print(len(sa_ings))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "faa91329-8719-4c68-8751-57026d5b8b7c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5071"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Number of mexican ings\n",
    "\n",
    "len(mex_ings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "067f3d80-73a4-435f-9d8a-10115d39ad41",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5265\n",
      "5072\n"
     ]
    }
   ],
   "source": [
    "print(len(it_ings))\n",
    "print(len(mex_ings))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "81a5f467-b3b5-4a52-b287-6a50035d5271",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "str"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(df_test[\"Ing List Sent\"][731])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "fb9b341a-c37b-4d08-b0a3-8a8adfd69a5d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "#pip install --upgrade embedding4bert"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "3a6a5d33-caf0-4025-8ed7-7cbb45b387e7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "#Creating a list of all Italian Ing_Inst sentences\n",
    "it_ing_inst=df[df[\"Region\"]==\"Italian\"][\"Ing Inst\"].tolist()\n",
    "\n",
    "#Creating a list of all Mexican Ing_Inst sentences\n",
    "mex_ing_inst=df[df[\"Region\"]==\"Mexican\"][\"Ing Inst\"].tolist()\n",
    "\n",
    "#Creating a list of all Mexican Ing_Inst sentences\n",
    "ca_ing_inst=df[df[\"Region\"]==\"Canadian\"][\"Ing Inst\"].tolist()\n",
    "\n",
    "#Creating a list of all Mexican Ing_Inst sentences\n",
    "in_ing_inst=df[df[\"Region\"]==\"Indian Subcontinent\"][\"Ing Inst\"].tolist()\n",
    "\n",
    "#Creating a list of all Mexican Ing_Inst sentences\n",
    "sa_ing_inst=df[df[\"Region\"]==\"South American\"][\"Ing Inst\"].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "adc7c34f-acb4-4405-96d7-7ce5936702ef",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "16574\n",
      "14447\n",
      "6694\n",
      "6463\n",
      "7171\n"
     ]
    }
   ],
   "source": [
    "print(len(it_ing_inst))\n",
    "print(len(mex_ing_inst))\n",
    "print(len(ca_ing_inst))\n",
    "print(len(in_ing_inst))\n",
    "print(len(sa_ing_inst))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c8cb6779-f475-489e-9107-6b4e71ef96e2",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "51349"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_sent=df[\"Ing Inst\"].tolist()\n",
    "len(all_sent)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f8299173-9d03-486f-8a93-70b17b818a47",
   "metadata": {},
   "source": [
    "## Generating Cuisine Embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "ff01835c-1c5e-4336-8928-9f9d651b450b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "model = SentenceTransformer('all-MiniLM-L6-v2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "fdb28b8a-f4e1-49e0-b80a-f12c97e78cca",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "all_embs=model.encode(all_sent)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "c7a92fa2-2893-45ed-b6d0-7864eadcd881",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(51349, 384)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sbert_df = pd.DataFrame(all_embs) \n",
    "sbert_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "9f9f0dea-42a4-4e0f-a3e4-7a9061849912",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "384"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(all_embs[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "d94d2081-b529-4411-9543-67e92494cbc7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "categories= df[\"Region\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "dd66dc9b-9a41-4747-a31f-028c5d3fe272",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "cuisine_emb_dict={}\n",
    "for cuisine in categories:\n",
    "    indices= df[df[\"Region\"]==cuisine].index\n",
    "    selected_rows=sbert_df.loc[indices]\n",
    "    cuisine_emb_dict[cuisine]=np.mean(selected_rows)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "fe57e099-e563-485b-baf5-299bbd40e121",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "384"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(cuisine_emb_dict[\"Italian\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "45c95392-b5da-4f72-87be-5886ecd1ab02",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "cuisine_emb_dict = {key: value.to_numpy() for key, value in cuisine_emb_dict.items()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "31391faf-a5dc-42b3-b12b-10028ba622df",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "it_embs=model.encode(it_ing_inst)\n",
    "mex_embs=model.encode(mex_ing_inst)\n",
    "ca_embs=model.encode(ca_ing_inst)\n",
    "in_embs=model.encode(in_ing_inst)\n",
    "sa_embs=model.encode(sa_ing_inst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "cd3a5939-5b67-4bcb-ab47-d393bb758ab4",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "numpy.ndarray"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(it_embs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "e359d451-b7df-470b-aff1-b4c11d1f227c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "it_mean= np.mean(it_embs,axis=0)\n",
    "mex_mean = np.mean(mex_embs,axis=0)\n",
    "ca_mean = np.mean(ca_embs,axis=0)\n",
    "in_mean = np.mean(in_embs,axis=0)\n",
    "sa_mean = np.mean(sa_embs,axis=0)\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "a51e1bd6-4155-4077-b44e-3e2995a768e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "cuisine_emb_dict={}\n",
    "temp=np.zeros(384)\n",
    "cuisine_emb_dict[\"Italian\"]=np.hstack((it_mean,temp))\n",
    "cuisine_emb_dict[\"Mexican\"]=np.hstack((mex_mean,temp))\n",
    "cuisine_emb_dict[\"Canadian\"]=np.hstack((ca_mean,temp))\n",
    "cuisine_emb_dict[\"Indian Subcontinent\"]=np.hstack((in_mean,temp))\n",
    "cuisine_emb_dict[\"South American\"]=np.hstack((sa_mean,temp))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "2be8ea40-92e0-40dd-925f-84f5bc14644f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "numpy.ndarray"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(cuisine_emb_dict[\"Italian\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "da90dcc2-cb0e-44b7-b62d-829c8e45a042",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "768\n",
      "768\n",
      "768\n",
      "768\n",
      "768\n"
     ]
    }
   ],
   "source": [
    "for key in cuisine_emb_dict.keys():\n",
    "    \n",
    "\n",
    "    print(len(cuisine_emb_dict[key]))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "9c11b533-a023-4f32-93bd-a562c9239226",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Dumping the SBERT Cuisine Embeddings as pickle file\n",
    "\n",
    "with open(\"All_SBERT_Pooling_V1.pkl\", \"wb\") as file:\n",
    "  pickle.dump(cuisine_emb_dict, file)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "1f4c6765-5e49-40ed-835a-2992b50ecee4",
   "metadata": {},
   "outputs": [],
   "source": [
    "f= open(\"All_SBERT_Pooling_V1.pkl\",\"rb\")\n",
    "old_embs= pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "6e929918-9877-4c75-9510-eb5d53896c91",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.8564218808017292"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cosine_similarity(old_embs[\"Mexican\"].reshape(1,-1),old_embs[\"Canadian\"].reshape(1,-1))[0][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "4de5873d-03f4-4645-9290-3f44ac2741f9",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.85642177"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cosine_similarity(cuisine_emb_dict[\"Mexican\"].reshape(1,-1),cuisine_emb_dict[\"Canadian\"].reshape(1,-1))[0][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "28cad4f3-c5bf-432f-ac38-12f8a11916c3",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
