{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "60bb400a-3551-47ce-8926-48c8b31ba033",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "from gensim.models import Word2Vec\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import re\n",
    "import gensim\n",
    "import ast\n",
    "import pickle\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "f57486fd-9a5a-4b74-aa35-c7c2e0b865f3",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "f=open(\"Cuisine Fusion Data/tdata.data\",'rb')\n",
    "tdata=pickle.load(f)\n",
    "\n",
    "df= pd.read_csv(\"Recipe DB Modified for BERT.csv\")\n",
    "df.head()\n",
    "\n",
    "df_full= pd.read_csv(\"Cuisine Fusion Data/full_region.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "a99d4ff8-e366-4bd3-b976-71c3d31a158c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Recipe_id</th>\n",
       "      <th>Region</th>\n",
       "      <th>Sub_region</th>\n",
       "      <th>Continent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2610</td>\n",
       "      <td>Middle Eastern</td>\n",
       "      <td>Egyptian</td>\n",
       "      <td>African</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2611</td>\n",
       "      <td>Middle Eastern</td>\n",
       "      <td>Egyptian</td>\n",
       "      <td>African</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2612</td>\n",
       "      <td>Middle Eastern</td>\n",
       "      <td>Egyptian</td>\n",
       "      <td>African</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2613</td>\n",
       "      <td>Middle Eastern</td>\n",
       "      <td>Egyptian</td>\n",
       "      <td>African</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2614</td>\n",
       "      <td>Middle Eastern</td>\n",
       "      <td>Egyptian</td>\n",
       "      <td>African</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Recipe_id          Region Sub_region Continent\n",
       "0       2610  Middle Eastern   Egyptian   African\n",
       "1       2611  Middle Eastern   Egyptian   African\n",
       "2       2612  Middle Eastern   Egyptian   African\n",
       "3       2613  Middle Eastern   Egyptian   African\n",
       "4       2614  Middle Eastern   Egyptian   African"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_full.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "49f1d41f-57fc-4897-a64f-cab78b648eaf",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "51349"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(tdata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d562b980-1f15-4a33-a028-db17ab8ad4f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "tdata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4f3e467f-a495-41cb-b929-eb798121e058",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'This recipe from Indian_Subcontinent cuisine contains dungeness_crab turmeric salt mustard_seed water mustard_oil red_onion potato_lengthwise clove cinnamon green_cardamom_pod black_peppercorn tomato thai_green_chile garlic_paste ginger_paste cayenne_pepper white_sugar garnish wedge_lemon cilantro as ingredients rub the crabs with  teaspoon of the turmeric and  teaspoon salt  let them marinate for  hour   combine the mustard_seed and hot_water in a small bowl and let stand for  minutes   use a mortar and pestle to grind the seeds into a coarse paste   heat the oil in a wok or kadhai over medium heat   add the crabs and stir fry until they change color  about  minutes   remove the crabs from the oil and set aside   add the sliced onions to the wok and cook and stir over medium heat until the onions are translucent  about  minutes   raise the heat to high  add the potatoes  and cook  stirring constantly  for about  minutes   add the cloves  cinnamon stick  cardamom_pods  and peppercorns  and stir for thirty seconds   stir in the tomatoes  ginger_paste  and garlic_paste   halve three of the chiles and add them to the wok   cook and stir for an additional minute or two over high heat   reduce the heat to medium  add the remaining  teaspoon turmeric  the cayenne_pepper  and the mustard_paste and stir to combine   add the crabs to the wok and pour in just enough water to cover the vegetables   bring the water to a boil and stir in the sugar and salt to taste   cover the wok  reduce the heat  and simmer until the potatoes are tender and the water is reduced by half  about  minutes   remove the lid  stir  and simmer until the gravy is thickened  about  minutes more   squeeze the lemon_wedge over the finished dish   garnish with chopped cilantro and sliced green_chile and serve hot  with rice '"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"Ing Inst\"][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "ae3de101-2187-476a-9b60-498c115e8193",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "df['Ingredient_List']= df['Ingredient_List'].apply(ast.literal_eval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "8d0b22a1-4228-45c1-b4d0-3baadb90ade4",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of ingredients in Italian: 5264\n",
      "Number of ingredients in Mexican: 5071\n",
      "Number of ingredients in Canadian: 3407\n",
      "Number of ingredients in South American: 3496\n",
      "Number of ingredients in Indian Subcontinent: 2657\n"
     ]
    }
   ],
   "source": [
    "cuisine_list= [\"Italian\",\"Mexican\",\"Canadian\",\"South American\",\"Indian Subcontinent\"]\n",
    "ing_dict={}\n",
    "for cuisine in cuisine_list:\n",
    "    ing_dict[cuisine] = list(set([item for sublist in df[df[\"Region\"]==cuisine][\"Ingredient_List\"].tolist() for item in sublist if item not in [\"Italian\",\"italian\",\"Mexican\",\"mexican\",\"Canadian\",\"canadian\",\"South_American\",\"south_american\",\"Indian_Subcontinent\",\"indian_subcontinent\"]]))\n",
    "    print(f\"Number of ingredients in {cuisine}: {len(ing_dict[cuisine])}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "db5cb924-fc22-4530-ba4f-6111c67fdf68",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\"italian\" in ing_dict[\"Italian\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "9e100e25-fd4f-4fc9-9a3b-7e5e6575606f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'This recipe from Indian_Subcontinent cuisine contains dungeness_crab turmeric salt mustard_seed water mustard_oil red_onion potato_lengthwise clove cinnamon green_cardamom_pod black_peppercorn tomato thai_green_chile garlic_paste ginger_paste cayenne_pepper white_sugar garnish wedge_lemon cilantro as ingredients rub the crabs with  teaspoon of the turmeric and  teaspoon salt  let them marinate for  hour   combine the mustard_seed and hot_water in a small bowl and let stand for  minutes   use a mortar and pestle to grind the seeds into a coarse paste   heat the oil in a wok or kadhai over medium heat   add the crabs and stir fry until they change color  about  minutes   remove the crabs from the oil and set aside   add the sliced onions to the wok and cook and stir over medium heat until the onions are translucent  about  minutes   raise the heat to high  add the potatoes  and cook  stirring constantly  for about  minutes   add the cloves  cinnamon stick  cardamom_pods  and peppercorns  and stir for thirty seconds   stir in the tomatoes  ginger_paste  and garlic_paste   halve three of the chiles and add them to the wok   cook and stir for an additional minute or two over high heat   reduce the heat to medium  add the remaining  teaspoon turmeric  the cayenne_pepper  and the mustard_paste and stir to combine   add the crabs to the wok and pour in just enough water to cover the vegetables   bring the water to a boil and stir in the sugar and salt to taste   cover the wok  reduce the heat  and simmer until the potatoes are tender and the water is reduced by half  about  minutes   remove the lid  stir  and simmer until the gravy is thickened  about  minutes more   squeeze the lemon_wedge over the finished dish   garnish with chopped cilantro and sliced green_chile and serve hot  with rice '"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"Ing Inst\"][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "fb0b99f0-7c0e-48f7-905d-dfa07574ec39",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['This recipe from Italian cuisine contains extra_beef egg breadcrumb parmesan_cheese basil_leaf italian_flat_leaf_parsley green_onion chicken_broth escarole lemon orzo as ingredients mix together the meat  egg  bread_crumbs  cheese  basil  parsley  and green_onions  shape into  inch balls   pour broth into a large saucepan over high heat   when boiling  drop in meatballs   stir in escarole  lemon_zest and orzo   return to a boil  reduce heat to medium   cook at a slow boil for  minutes or until orzo is tender  stirring frequently   serve sprinkled with cheese ', 'This recipe from Italian cuisine contains olive_oil garlic onion celery carrot chicken_broth water tomato_sauce red_wine kidney_bean green_bean baby_spinach zucchini oregano basil salt_pepper seashell_pasta parmesan_cheese as ingredients in a large stock pot  over medium low heat  heat olive_oil and saute garlic for  to  minutes   add onion and saute for  to  minutes   add celery and carrots  saute for  to  minutes   add chicken_broth  water and tomato_sauce  bring to boil  stirring frequently   if desired add red_wine at this point   reduce heat to low and add kidney_beans  green_beans  spinach leaves  zucchini  oregano  basil  salt and pepper   simmer for  to  minutes  the longer the better   fill a medium saucepan with water and bring to a boil   add macaroni and cook until tender   drain water and set aside   once pasta is cooked and soup is heated through place  tablespoons cooked pasta into individual serving bowls   ladle soup on top of pasta and sprinkle parmesan_cheese on top   spray with olive_oil and serve ']\n",
      "Number of recipes in Italian: 16574\n",
      "['This recipe from Mexican cuisine contains cumin chili_powder buttery tilapia_fillet torn_romaine_lettuce_leaf tomato red_onion as ingredients mix cumin and chili_powder together and sprinkle over tilapia chunks to coat   heat non stick skillet over medium high heat and melt buttery spread or oil   saute tilapia on each side until cooked through   to  minutes per side   remove from pan and set aside   in same skillet  prepare knorr fiesta sidestm mexican_rice according to package directions   transfer tilapia to the rice mixture   divide lettuce between four salad bowls or plates   divide the rice and tilapia mixture among the dishes   top with tomatoes and onions   serve with your favorite light dressing  such as ranch ', 'This recipe from Mexican cuisine contains mango_salsa mango avocado tomato red_onion red_pepper flat_leaf_parsley canola_oil brown_sugar lime cider_vinegar pepper_sauce salt_black_pepper taco_shell mahi_fillet black_pepper paprika salt olive_oil as ingredients stir mango  avocado  tomato  red_onion  red_pepper  parsley  canola_oil  brown_sugar  lime_juice  cider_vinegar  hot_pepper_sauce  salt  and ground black_pepper together in a bowl   refrigerate salsa for at least  hour   preheat oven to  degrees f   heat taco_shells in preheated oven until crisp  about  minutes   season mahi mahi with  teaspoon ground black_pepper  paprika  and  teaspoon salt   heat olive_oil in a skillet over medium high heat   cook mahi mahi in olive_oil until fish flakes easily  about  minutes per side   place mahi mahi slices in taco_shells and top with mango_salsa ']\n",
      "Number of recipes in Mexican: 14447\n",
      "['This recipe from Canadian cuisine contains pearl_barley water canola_oil onion celery carrot white_mushroom garlic tomato_paste beef_stock oregano salt black_pepper worcestershire_sauce as ingredients combine pearl_barley and boiling water in a bowl  set aside to soak for  minutes   heat canola_oil in a large saucepan over medium heat   cook and stir onion  celery  and carrot in hot oil until softened   to  minutes   stir mushrooms into the onion mixture  cook and stir until softened and brown  about  minutes   drain the barley  discarding any remaining water from soaking   stir the barley into the mushroom mixture   stir the garlic and tomato_paste into the mixture  stir until combined  then pour the beef_stock into the sauce pan   season with the oregano  salt  black_pepper  and worcestershire_sauce   bring the soup to a boil  reduce heat to low  and cook at a simmer for  hour ', 'This recipe from Canadian cuisine contains molasses brown_sugar ketchup lemon_juice oregano garlic_powder onion_powder ring_onion as ingredients combine the molasses  brown_sugar  ketchup  lemon_juice  oregano  garlic_powder  onion_powder  and onion in a bowl  stir to combine   pour marinade over meat and allow to sit in the refrigerator overnight before cooking ']\n",
      "Number of recipes in Canadian: 6694\n",
      "['This recipe from South_American cuisine contains egg peel_lime milk rum nutmeg as ingredients whisk eggs and lime_peel together in a large bowl until light and fluffy   stir in evaporated milk and condensed milk to taste   add rum  bitters  and nutmeg   remove lime_peel and serve over crushed ice ', \"This recipe from South_American cuisine contains milk vanilla_extract purpose_flour confectioner_sugar shortening butter salt egg_yolk heavy_cream confectioner_'_sugar as ingredients to make filling  pour the condensed milk into a heavy saucepan   cook over medium heat until hot   stirring constantly  lower the heat to low and continue to cook the milk takes on a golden color  about   minutes   remove from heat and add vanilla   transfer to a bowl and cover directly with plastic wrap   set aside and let cool to room temperature   preheat oven to  degrees f   combine the flour   tablespoons confectioners sugar  and the salt in a bowl   cut in the shortening and the butter until crumbly   beat in the yolk and the cream   mix to form a dough  adding more cream if necessary   roll dough on a floured counter to  inch thick   cut into   inch rounds and place on ungreased sheets   prick the top of the rounds a few times with a fork   bake for   minutes or until the edges are light brown   cool on sheet  minute and remove to rack   spread the filling on one half and sandwich with another round   dust cookies with confectioners sugar \"]\n",
      "Number of recipes in South American: 7171\n",
      "['This recipe from Indian_Subcontinent cuisine contains dungeness_crab turmeric salt mustard_seed water mustard_oil red_onion potato_lengthwise clove cinnamon green_cardamom_pod black_peppercorn tomato thai_green_chile garlic_paste ginger_paste cayenne_pepper white_sugar garnish wedge_lemon cilantro as ingredients rub the crabs with  teaspoon of the turmeric and  teaspoon salt  let them marinate for  hour   combine the mustard_seed and hot_water in a small bowl and let stand for  minutes   use a mortar and pestle to grind the seeds into a coarse paste   heat the oil in a wok or kadhai over medium heat   add the crabs and stir fry until they change color  about  minutes   remove the crabs from the oil and set aside   add the sliced onions to the wok and cook and stir over medium heat until the onions are translucent  about  minutes   raise the heat to high  add the potatoes  and cook  stirring constantly  for about  minutes   add the cloves  cinnamon stick  cardamom_pods  and peppercorns  and stir for thirty seconds   stir in the tomatoes  ginger_paste  and garlic_paste   halve three of the chiles and add them to the wok   cook and stir for an additional minute or two over high heat   reduce the heat to medium  add the remaining  teaspoon turmeric  the cayenne_pepper  and the mustard_paste and stir to combine   add the crabs to the wok and pour in just enough water to cover the vegetables   bring the water to a boil and stir in the sugar and salt to taste   cover the wok  reduce the heat  and simmer until the potatoes are tender and the water is reduced by half  about  minutes   remove the lid  stir  and simmer until the gravy is thickened  about  minutes more   squeeze the lemon_wedge over the finished dish   garnish with chopped cilantro and sliced green_chile and serve hot  with rice ', 'This recipe from Indian_Subcontinent cuisine contains tomato cumin turmeric salt water vegetable_oil whitefish_fillet mustard_seed cumin_seed black_cumin_seed fennel_seed fenugreek_seed as ingredients bring the tomatoes  cumin  turmeric  salt  and water to a boil in a four quart saucepan  reduce heat to medium and maintain a simmer   heat the oil in a skillet over medium high heat  cook the fish in the oiled skillet until golden brown   to  minutes per side   transfer the fish to the saucepan   heat a separate skillet over medium heat  and toast the mustard_seeds  cumin_seeds  black_cumin_seeds  fennel_seeds  and fenugreek_seeds until fragrant  just a few seconds   stir the spices into the saucepan   simmer until the flavors integrate  about  minutes more   serve hot ']\n",
      "Number of recipes in Indian Subcontinent: 6463\n"
     ]
    }
   ],
   "source": [
    "cuisine_list= [\"Italian\",\"Mexican\",\"Canadian\",\"South American\",\"Indian Subcontinent\"]\n",
    "ing_inst_dict={}\n",
    "for cuisine in cuisine_list:\n",
    "    ing_inst_dict[cuisine] = [item for item in df[df[\"Region\"]==cuisine][\"Ing Inst\"]]\n",
    "    print(ing_inst_dict[cuisine][:2])\n",
    "    print(f\"Number of recipes in {cuisine}: {len(ing_inst_dict[cuisine])}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "a689121c-3cc6-4ddc-a665-cf101d0244dd",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of Recipes in Italian 16574\n",
      "Number of Recipes in Mexican 14447\n",
      "Number of Recipes in Canadian 6694\n",
      "Number of Recipes in South American 7171\n",
      "Number of Recipes in Indian Subcontinent 6463\n"
     ]
    }
   ],
   "source": [
    "cuisine_list= [\"Italian\",\"Mexican\",\"Canadian\",\"South American\",\"Indian Subcontinent\"]\n",
    "sentences_dict={}\n",
    "for cuisine in cuisine_list:\n",
    "    sentences_dict[cuisine]=[item.split() for item in ing_inst_dict[cuisine]]\n",
    "cuisine_list= [\"Italian\",\"Mexican\",\"Canadian\",\"South American\",\"Indian Subcontinent\"]\n",
    "    print(f\"Number of Recipes in {cuisine} {len(sentences_dict[cuisine])}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "747394e9-9621-4f69-9c53-72000cd91b23",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "cuisine_list= [\"Italian\",\"Mexican\",\"Canadian\",\"South American\",\"Indian Subcontinent\"]\n",
    "\n",
    "#Combined Bag of Words (CBoW) model \n",
    "model_w2v_sg={}\n",
    "model_w2v_cbow={}\n",
    "#Model for italian cuisine\n",
    "for cuisine in cuisine_list:\n",
    "    model_w2v_sg[cuisine] =Word2Vec(sentences_dict[cuisine], vector_size=100, window=10,workers=4, epochs=10, min_count=1,sg=1)\n",
    "    model_w2v_cbow[cuisine] =Word2Vec(sentences_dict[cuisine], vector_size=100, window=10,workers=4, epochs=10, min_count=1,sg=0)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "d422584c-0f7d-4dc1-93ef-aff0502c73fa",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Specify the file path where you want to save the pickle file\n",
    "file_path = \"w2v_model_ing_inst_sg.pkl\"\n",
    "\n",
    "# Open the file in binary write mode and save the dictionary\n",
    "with open(file_path, 'wb') as file:\n",
    "    pickle.dump(model_w2v_sg, file)\n",
    "    \n",
    "# Specify the file path where you want to save the pickle file\n",
    "file_path = \"w2v_model_ing_inst_cbow.pkl\"\n",
    "\n",
    "# Open the file in binary write mode and save the dictionary\n",
    "with open(file_path, 'wb') as file:\n",
    "    pickle.dump(model_w2v_cbow, file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e5d0fae9-1de5-4667-8f49-c671d1830ffa",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "f= open(\"w2v_model_ing_inst_sg.pkl\",\"rb\")\n",
    "model_w2v_sg= pickle.load(f)\n",
    "\n",
    "f= open(\"w2v_model_ing_inst_cbow.pkl\",\"rb\")\n",
    "model_w2v_cbow= pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "41296142-d052-49af-9e7c-06cca8da447e",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "#Renaming old keys in model_w2v_sg to new keys\n",
    "\n",
    "#model_w2v_sg[\"South_American\"]=model_w2v_sg.pop(\"South American\")\n",
    "#model_w2v_sg[\"Indian_Subcontinent\"]=model_w2v_sg.pop(\"Indian Subcontinent\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "7110e30a-4328-45a5-9737-5818c5797027",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "#Renaming old keys in model_w2v_cbow to new keys\n",
    "\n",
    "#model_w2v_cbow[\"South_American\"]=model_w2v_cbow.pop(\"South American\")\n",
    "#model_w2v_cbow[\"Indian_Subcontinent\"]=model_w2v_cbow.pop(\"Indian Subcontinent\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "64f5126a-391b-471f-b2bc-d4b5feb6b0a4",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('sea_salt', 0.7925021052360535), ('kosher_salt', 0.7550740838050842), ('red_currant_jelly', 0.6913102269172668), ('thyme', 0.6853688359260559), ('chipotle_chile_powder', 0.6740067005157471), ('red_pear_tomato', 0.6648855805397034), ('browncrisp_bacon', 0.6634172797203064), ('tomato_marzano', 0.6624129414558411), ('red_hot_pepper', 0.6623046398162842), ('nut_almond', 0.6606847643852234)]\n",
      "[('sea_salt', 0.7475336790084839), ('kosher_salt', 0.7240864634513855), ('gorgazola', 0.48040276765823364), ('seasoning_salt', 0.46795520186424255), ('anaheim', 0.44233712553977966), ('garlic_salt', 0.4410187900066376), ('classico_cheese_pasta_sauce', 0.4230610430240631), ('wheat_baguette', 0.4029676020145416), ('seafodd', 0.3931335508823395), ('turkey_total', 0.38992637395858765)]\n"
     ]
    }
   ],
   "source": [
    "print(model_w2v_sg[\"Italian\"].wv.most_similar('salt'))\n",
    "print(model_w2v_cbow[\"Italian\"].wv.most_similar('salt'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "36c22faa-9dab-4eb3-b417-89cedd67322a",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['and', 'the', 'to']"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(model_w2v_sg[\"South_American\"].wv.key_to_index.keys())[:3]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "23cf7b98-1b8b-412b-9a3b-15819c419b01",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cosine similarity between Italian and Mexican: [[0.15546444]]\n",
      "Cosine similarity between Italian and Canadian: [[0.34845704]]\n",
      "Cosine similarity between Italian and South_American: [[0.02999489]]\n",
      "Cosine similarity between Italian and Indian_Subcontinent: [[0.20211038]]\n",
      "Cosine similarity between Mexican and Italian: [[0.15546444]]\n",
      "Cosine similarity between Mexican and Canadian: [[0.2172256]]\n",
      "Cosine similarity between Mexican and South_American: [[0.09514782]]\n",
      "Cosine similarity between Mexican and Indian_Subcontinent: [[0.23721802]]\n",
      "Cosine similarity between Canadian and Italian: [[0.34845704]]\n",
      "Cosine similarity between Canadian and Mexican: [[0.2172256]]\n",
      "Cosine similarity between Canadian and South_American: [[0.24870455]]\n",
      "Cosine similarity between Canadian and Indian_Subcontinent: [[0.18537205]]\n",
      "Cosine similarity between South_American and Italian: [[0.02999489]]\n",
      "Cosine similarity between South_American and Mexican: [[0.09514782]]\n",
      "Cosine similarity between South_American and Canadian: [[0.24870455]]\n",
      "Cosine similarity between South_American and Indian_Subcontinent: [[0.34993562]]\n",
      "Cosine similarity between Indian_Subcontinent and Italian: [[0.20211038]]\n",
      "Cosine similarity between Indian_Subcontinent and Mexican: [[0.23721802]]\n",
      "Cosine similarity between Indian_Subcontinent and Canadian: [[0.18537205]]\n",
      "Cosine similarity between Indian_Subcontinent and South_American: [[0.34993562]]\n"
     ]
    }
   ],
   "source": [
    "cuisine=[\"Italian\",\"Mexican\",\"Canadian\",\"South_American\",\"Indian_Subcontinent\"]\n",
    "for c1 in cuisine:\n",
    "    for c2 in cuisine:\n",
    "        \n",
    "        if c1 !=c2:\n",
    "            print(f\"Cosine similarity between {c1} and {c2}: {cosine_similarity(model_w2v_sg[c1].wv[c1].reshape(1,-1),model_w2v_sg[c2].wv[c2].reshape(1,-1))}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "4aa0720e-221d-45b3-aab6-46578215cba3",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'italian'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[27], line 6\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m c2 \u001b[38;5;129;01min\u001b[39;00m cuisine:\n\u001b[1;32m      5\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m c1 \u001b[38;5;241m!=\u001b[39mc2:\n\u001b[0;32m----> 6\u001b[0m         \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCosine similarity between \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mc1\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mc2\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcosine_similarity(model_w2v_cbow[c1]\u001b[38;5;241m.\u001b[39mwv[c1]\u001b[38;5;241m.\u001b[39mreshape(\u001b[38;5;241m1\u001b[39m,\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m),model_w2v_cbow[c2]\u001b[38;5;241m.\u001b[39mwv[c2]\u001b[38;5;241m.\u001b[39mreshape(\u001b[38;5;241m1\u001b[39m,\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m))\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
      "\u001b[0;31mKeyError\u001b[0m: 'italian'"
     ]
    }
   ],
   "source": [
    "cuisine=[\"Italian\",\"Mexican\",\"Canadian\",\"South_American\",\"Indian_Subcontinent\"]\n",
    "for c1 in cuisine:\n",
    "    for c2 in cuisine:\n",
    "        \n",
    "        if c1 !=c2:\n",
    "            print(f\"Cosine similarity between {c1} and {c2}: {cosine_similarity(model_w2v_cbow[c1].wv[c1].reshape(1,-1),model_w2v_[c2].wv[c2].reshape(1,-1))}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "dc1e3ebf-6dad-4dfd-8179-8afcf97066af",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['Italian', 'Mexican', 'Canadian', 'South American', 'Indian Subcontinent'])"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ing_dict.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "78422c76-19e4-4758-8de1-2fd56c27e811",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['Italian', 'Mexican', 'Canadian', 'South_American', 'Indian_Subcontinent'])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_w2v_sg.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "c66c76b5-7fab-4b45-a0a4-d430900aedc1",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['Italian', 'Mexican', 'Canadian', 'South_American', 'Indian_Subcontinent'])"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_w2v_cbow.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "0b258db3-2387-48a1-9a59-4dbcb7a002d4",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3496\n",
      "2657\n"
     ]
    }
   ],
   "source": [
    "print(len(ing_dict[\"South_American\"]))\n",
    "print(len(ing_dict[\"Indian_Subcontinent\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "b529293a-0e05-4cd1-b53b-584316d18b53",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.17631292, -0.07728941,  0.22843763,  0.01542629,  0.01119746,\n",
       "       -0.03960142, -0.03401047,  0.04264101, -0.1758497 , -0.08173409,\n",
       "       -0.24703893, -0.3144137 ,  0.04020148,  0.13387072, -0.15018801,\n",
       "        0.12688826, -0.08932471, -0.04732473,  0.04999096, -0.13533483,\n",
       "        0.19290411, -0.05077958,  0.04999002,  0.07160948, -0.05498168,\n",
       "        0.07892894,  0.04809561,  0.0034171 ,  0.05629408, -0.03905073,\n",
       "       -0.09045357,  0.09742434,  0.04532578,  0.16490534, -0.1804091 ,\n",
       "        0.23288277,  0.11219794, -0.00678533, -0.01992148, -0.11115509,\n",
       "        0.06943585, -0.10069323, -0.12791045,  0.2506861 ,  0.03376681,\n",
       "       -0.01354546, -0.16404209,  0.02708535,  0.1689996 ,  0.11612594,\n",
       "        0.21412326, -0.16916487, -0.05746803, -0.07487226, -0.02657647,\n",
       "       -0.13035108,  0.10949836, -0.08885775, -0.06598619,  0.10036502,\n",
       "       -0.12039863, -0.05903397,  0.04643947,  0.11567812, -0.21525697,\n",
       "        0.00682932,  0.03377649,  0.06845801, -0.20442936,  0.02335701,\n",
       "       -0.06005627,  0.07636696,  0.15103991,  0.05632348,  0.10943571,\n",
       "        0.03831354, -0.02665849, -0.01113594, -0.17685951,  0.09175619,\n",
       "       -0.1316375 , -0.08380696, -0.08939804,  0.0913296 ,  0.04397869,\n",
       "        0.0271841 ,  0.04347844, -0.03792846,  0.14027174,  0.10987166,\n",
       "        0.17394201,  0.13280357,  0.00655592,  0.11001629,  0.06878895,\n",
       "        0.09077041, -0.00519384,  0.03375584, -0.004575  ,  0.06118499],\n",
       "      dtype=float32)"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_w2v_sg[\"Italian\"].wv[\"extra_dark_roast_coffee\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "89410f57-9409-4360-98a3-c3c491607f70",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of ingredients in Italian 5264\n",
      "Number of ingredients in Italian 5264\n",
      "Number of ingredients in Mexican 5071\n",
      "Number of ingredients in Mexican 5071\n",
      "Number of ingredients in Canadian 3407\n",
      "Number of ingredients in Canadian 3407\n",
      "Number of ingredients in South_American 3496\n",
      "Number of ingredients in South_American 3496\n",
      "Number of ingredients in Indian_Subcontinent 2657\n",
      "Number of ingredients in Indian_Subcontinent 2657\n"
     ]
    }
   ],
   "source": [
    "cuisine_list=[\"Italian\",\"Mexican\",\"Canadian\",\"South_American\",\"Indian_Subcontinent\"]\n",
    "ing_emb_dict_sg={}\n",
    "ing_emb_dict_cbow={}\n",
    "for cuisine in cuisine_list:\n",
    "    ing_emb_dict_sg[cuisine]={}\n",
    "    ing_emb_dict_cbow[cuisine]={}\n",
    "    for ing in ing_dict[cuisine]:        \n",
    "        ing_emb_dict_sg[cuisine][ing]=model_w2v_sg[cuisine].wv[ing]\n",
    "        ing_emb_dict_cbow[cuisine][ing]=model_w2v_cbow[cuisine].wv[ing]\n",
    "    print(f\"Number of ingredients in {cuisine} {len(ing_emb_dict_sg[cuisine])}\")\n",
    "    print(f\"Number of ingredients in {cuisine} {len(ing_emb_dict_cbow[cuisine])}\")\n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "8aaa3b37-5df1-4185-a51e-e283c7e9a16b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Specify the file path where you want to save the pickle file\n",
    "file_path = \"ing_emb_dict_sg.pkl\"\n",
    "\n",
    "# Open the file in binary write mode and save the dictionary\n",
    "with open(file_path, 'wb') as file:\n",
    "    pickle.dump(ing_emb_dict_sg, file)\n",
    "    \n",
    "# Specify the file path where you want to save the pickle file\n",
    "file_path = \"ing_emb_dict_cbow.pkl\"\n",
    "\n",
    "# Open the file in binary write mode and save the dictionary\n",
    "with open(file_path, 'wb') as file:\n",
    "    pickle.dump(ing_emb_dict_cbow, file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4ed6190-5882-4f78-939a-9d93a62b9703",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
