{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys \n",
    "from bpemb import BPEmb\n",
    "from tqdm import tqdm\n",
    "import numpy as np\n",
    "import os\n",
    "import torch\n",
    "from gensim.models import KeyedVectors\n",
    "from collections import defaultdict\n",
    "\n",
    "sys.path.append('../datasets')\n",
    "sys.path.append(\"..\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create BP embedding for MUSE (SINGLE language)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def read(file, threshold=0, vocabulary=None, dtype='float'):\n",
    "    header = file.readline().split(' ')\n",
    "    count = int(header[0]) if threshold <= 0 else min(threshold, int(header[0]))\n",
    "    dim = int(header[1])\n",
    "    words = []\n",
    "    matrix = np.empty((count, dim),  dtype=dtype) if vocabulary is None else []\n",
    "    for i in tqdm(range(count)):\n",
    "        word, vec = file.readline().split(' ', 1)\n",
    "        if vocabulary is None:\n",
    "            words.append(word)\n",
    "            matrix[i] = np.fromstring(vec, sep=' ',  dtype=dtype)\n",
    "        elif word in vocabulary:\n",
    "            words.append(word)\n",
    "            matrix.append(np.fromstring(vec, sep=' ',  dtype=dtype))\n",
    "    return (words, matrix) if vocabulary is None else (words, torch.tensor(matrix,  dtype=dtype))\n",
    "\n",
    "def get_dict(dict_path, source, target):\n",
    "\n",
    "    dictf = open(dict_path, encoding='utf-8', errors='surrogateescape')\n",
    "    src2trg = defaultdict(set)\n",
    "\n",
    "    vocab = set()\n",
    "\n",
    "    for line in dictf:\n",
    "        splitted = line.split()\n",
    "        if len(splitted) > 2:\n",
    "            src, trg = splitted[:2]\n",
    "        elif len(splitted) == 2:\n",
    "            src, trg = splitted\n",
    "\n",
    "        src_ind = source.key_to_index[src]\n",
    "        trg_ind = target.key_to_index[trg]\n",
    "        src2trg[src_ind].add(trg_ind)\n",
    "        vocab.add(src)\n",
    "    return vocab, src2trg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████| 200000/200000 [00:18<00:00, 11045.80it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████| 200000/200000 [00:17<00:00, 11136.83it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████| 200000/200000 [00:16<00:00, 12253.08it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of non valid indices: 80832\n",
      "Number of valid indices: 119168\n"
     ]
    }
   ],
   "source": [
    "data_path = '../datasets'\n",
    "\n",
    "dataset_name = 'muse'\n",
    "lang = 'en'\n",
    "emb_dim_source = 100  \n",
    "emb_dim_target = 50    \n",
    "vs             = 200000\n",
    "\n",
    "path_source = os.path.join(data_path, f'muse/embeddings/wiki.multi.{lang}.vec')\n",
    "path_target = os.path.join(data_path, f'muse/embeddings/wiki.multi.{lang}.vec')\n",
    "\n",
    "model_source = open(path_source, encoding='utf-8', errors='surrogateescape')\n",
    "model_target = open(path_target, encoding='utf-8', errors='surrogateescape')\n",
    "\n",
    "i2w_source, vectors_source = read(model_source)\n",
    "i2w_target, vectors_target = read(model_target)\n",
    "\n",
    "bpemb_source = BPEmb(lang=lang, dim=emb_dim_source, vs=vs)\n",
    "bpemb_target = BPEmb(lang=lang, dim=emb_dim_target, vs=vs)\n",
    "\n",
    "model_new_source = KeyedVectors(vector_size=emb_dim_source)\n",
    "model_new_target = KeyedVectors(vector_size=emb_dim_target)\n",
    "\n",
    "non_valid_indices = set()\n",
    "valid_indices = set(list(range(len(vectors_source))))\n",
    "\n",
    "model_new_source.vectors = np.zeros((vectors_source.shape[0], emb_dim_source))\n",
    "model_new_target.vectors = np.zeros((vectors_target.shape[0], emb_dim_target))\n",
    "\n",
    "for ix, word in enumerate(tqdm(i2w_source)):  \n",
    "    word_embed_source = bpemb_source.embed(word)\n",
    "    word_embed_target = bpemb_target.embed(word)\n",
    "\n",
    "    if np.isnan(word_embed_source).any() or np.isnan(word_embed_target).any():\n",
    "        print(word)\n",
    "        non_valid_indices.add(ix)\n",
    "        \n",
    "    if word_embed_source.shape[0] != 1 or word_embed_target.shape[0] != 1:\n",
    "        non_valid_indices.add(ix)\n",
    "        continue \n",
    "\n",
    "    model_new_source.vectors[ix] = word_embed_source[:]\n",
    "    model_new_target.vectors[ix] = word_embed_target[:]\n",
    "    \n",
    "   \n",
    "valid_indices = valid_indices - non_valid_indices\n",
    "print('Number of non valid indices:', len(non_valid_indices))\n",
    "print('Number of valid indices:', len(valid_indices))\n",
    "\n",
    "valid_indices = list(valid_indices)\n",
    "\n",
    "model_new_source.vectors =  model_new_source.vectors[valid_indices]\n",
    "model_new_target.vectors =  model_new_target.vectors[valid_indices]\n",
    "\n",
    "model_new_source.index_to_key = [i2w_source[ix] for ix in valid_indices]\n",
    "model_new_target.index_to_key = [i2w_target[ix] for ix in valid_indices]\n",
    "\n",
    "model_new_source.key_to_index = {word:i for i, word in enumerate(model_new_source.index_to_key)}\n",
    "model_new_target.key_to_index = {word:i for i, word in enumerate(model_new_target.index_to_key)}\n",
    "\n",
    "assert len(model_new_source.vectors) == len(model_new_source.index_to_key)\n",
    "assert len(model_new_source.vectors) == len(model_new_source.key_to_index.keys())\n",
    "assert np.isnan(model_new_source.vectors).sum() == 0\n",
    "\n",
    "assert len(model_new_target.vectors) == len(model_new_target.index_to_key)\n",
    "assert len(model_new_target.vectors) == len(model_new_target.key_to_index.keys())\n",
    "assert np.isnan(model_new_target.vectors).sum() == 0\n",
    "assert len(model_new_source.vectors) == len(model_new_target.vectors)\n",
    "\n",
    "model_new_source.save(f'../datasets/{dataset_name}_{lang}_BP_{emb_dim_source}_{vs//1000}K.d2v')\n",
    "model_new_target.save(f'../datasets/{dataset_name}_{lang}_BP_{emb_dim_target}_{vs//1000}K.d2v')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create BP embedding for twitter/wiki-gigaword."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1193514, 100)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████| 1193514/1193514 [00:48<00:00, 24611.75it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of non valid indices: 1101177\n",
      "Number of valid indices: 92337\n"
     ]
    }
   ],
   "source": [
    "data_path = '../datasets'\n",
    "dataset_name = 'twitter'\n",
    "\n",
    "emb_dim_source = 100  \n",
    "emb_dim_target = 50   \n",
    "vs             = 200000\n",
    "\n",
    "path_source = f'../datasets/{dataset_name}_glove_{emb_dim_source}.d2v'\n",
    "path_target = f'../datasets/{dataset_name}_glove_{emb_dim_target}.d2v'\n",
    "\n",
    "model_source = KeyedVectors.load(path_source)\n",
    "model_target = KeyedVectors.load(path_target)\n",
    "\n",
    "bpemb_source = BPEmb(lang='en', dim=emb_dim_source, vs=vs)\n",
    "bpemb_target = BPEmb(lang='en', dim=emb_dim_target, vs=vs)\n",
    "\n",
    "model_new_source = KeyedVectors(vector_size=emb_dim_source)\n",
    "model_new_target = KeyedVectors(vector_size=emb_dim_target)\n",
    "\n",
    "model_new_source.vectors = np.zeros((model_source.vectors.shape[0], emb_dim_source))\n",
    "model_new_target.vectors = np.zeros((model_target.vectors.shape[0], emb_dim_target))\n",
    "\n",
    "print(model_new_source.vectors.shape)\n",
    "non_valid_indices = set()\n",
    "valid_indices = set(list(range(len(model_source.vectors))))\n",
    "\n",
    "for ix, word in enumerate(tqdm(model_source.index_to_key)):  \n",
    "    word_embed_source = bpemb_source.embed(word)\n",
    "    word_embed_target = bpemb_target.embed(word)\n",
    "\n",
    "    if word_embed_source.shape[0] != 1 or word_embed_target.shape[0] != 1:\n",
    "        non_valid_indices.add(ix)\n",
    "        continue\n",
    "    \n",
    "    if np.isnan(word_embed_source).any() or np.isnan(word_embed_target).any():\n",
    "        print(word)\n",
    "        non_valid_indices.add(ix)\n",
    "\n",
    "    model_new_source.vectors[ix] = word_embed_source[:]\n",
    "    model_new_target.vectors[ix] = word_embed_target[:]\n",
    "    \n",
    "   \n",
    "valid_indices = valid_indices - non_valid_indices\n",
    "print('Number of non valid indices:', len(non_valid_indices))\n",
    "print('Number of valid indices:', len(valid_indices))\n",
    "\n",
    "valid_indices = list(valid_indices)\n",
    "\n",
    "model_new_source.vectors =  model_new_source.vectors[valid_indices]\n",
    "model_new_target.vectors =  model_new_target.vectors[valid_indices]\n",
    "\n",
    "model_new_source.index_to_key = [model_source.index_to_key[ix] for ix in valid_indices]\n",
    "model_new_target.index_to_key = [model_target.index_to_key[ix] for ix in valid_indices]\n",
    "\n",
    "model_new_source.key_to_index = {word:i for i, word in enumerate(model_new_source.index_to_key)}\n",
    "model_new_target.key_to_index = {word:i for i, word in enumerate(model_new_target.index_to_key)}\n",
    "\n",
    "assert len(model_new_source.vectors) == len(model_new_source.index_to_key)\n",
    "assert len(model_new_source.vectors) == len(model_new_source.key_to_index.keys())\n",
    "assert np.isnan(model_new_source.vectors).sum() == 0\n",
    "\n",
    "assert len(model_new_target.vectors) == len(model_new_target.index_to_key)\n",
    "assert len(model_new_target.vectors) == len(model_new_target.key_to_index.keys())\n",
    "assert np.isnan(model_new_target.vectors).sum() == 0\n",
    "assert len(model_new_source.vectors) == len(model_new_target.vectors)\n",
    "\n",
    "model_new_source.save(f'../datasets/{dataset_name}_BP_{emb_dim_source}_{vs//1000}K.d2v')\n",
    "model_new_target.save(f'../datasets/{dataset_name}_BP_{emb_dim_target}_{vs//1000}K.d2v')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create BP embeddings for MUSE (Different languages - same dimensions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████| 200000/200000 [00:08<00:00, 22419.68it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████| 200000/200000 [00:08<00:00, 22314.52it/s]\n"
     ]
    }
   ],
   "source": [
    "data_path = '../datasets'\n",
    "\n",
    "source_lang = 'en'\n",
    "target_lang = 'fr'\n",
    "\n",
    "emb_dim_source  = 100\n",
    "emb_dim_target  = 100\n",
    "vs              = 200000\n",
    "\n",
    "source_path = os.path.join(data_path, f'muse/embeddings/wiki.multi.{source_lang}.vec')\n",
    "target_path = os.path.join(data_path, f'muse/embeddings/wiki.multi.{target_lang}.vec')\n",
    "vocab_path  = os.path.join(data_path, f'muse/dictionaries/{source_lang}-{target_lang}.txt') \n",
    "\n",
    "source_model = open(source_path, encoding='utf-8', errors='surrogateescape')\n",
    "target_model = open(target_path, encoding='utf-8', errors='surrogateescape')\n",
    "\n",
    "i2w_source, vectors_source = read(source_model)\n",
    "i2w_target, vectors_target = read(target_model)\n",
    "\n",
    "model_new_source = KeyedVectors(vector_size=emb_dim_source)\n",
    "model_new_target = KeyedVectors(vector_size=emb_dim_target)\n",
    "\n",
    "model_new_source.index_to_key = i2w_source[:]\n",
    "model_new_target.index_to_key = i2w_target[:]\n",
    "\n",
    "model_new_source.key_to_index = {word: i for i, word in enumerate(i2w_source)}\n",
    "model_new_target.key_to_index = {word: i for i, word in enumerate(i2w_target)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "93084\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████| 93084/93084 [00:05<00:00, 18023.85it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of non valid indices: 31315\n",
      "Number of valid indices: 61769\n"
     ]
    }
   ],
   "source": [
    "vocab, src2trg = get_dict(vocab_path, model_new_source, model_new_target)\n",
    "\n",
    "valid_keys = []\n",
    "for k in src2trg.keys():\n",
    "    if src2trg[k] != set():\n",
    "        valid_keys.append(k)\n",
    "\n",
    "print(len(valid_keys))\n",
    "\n",
    "indices_source = valid_keys[:]\n",
    "indices_target = [min(src2trg[ix]) for ix in indices_source]\n",
    "\n",
    "words_source = [model_new_source.index_to_key[ix] for ix in indices_source]\n",
    "words_target = [model_new_target.index_to_key[ix] for ix in indices_target]\n",
    "\n",
    "model_new_source.vectors = np.zeros((len(words_source), emb_dim_source))\n",
    "model_new_target.vectors = np.zeros((len(words_target), emb_dim_target))\n",
    "\n",
    "model_new_source.index_to_key = words_source\n",
    "model_new_target.index_to_key = words_target\n",
    "\n",
    "bpemb_source = BPEmb(lang=source_lang, dim=emb_dim_source, vs=vs)\n",
    "bpemb_target = BPEmb(lang=target_lang, dim=emb_dim_target, vs=vs)\n",
    "\n",
    "non_valid_indices = set()\n",
    "valid_indices = set(list(range(len(model_new_source.vectors))))\n",
    "\n",
    "for ix in tqdm(range(len(model_new_source.index_to_key))):#(tqdm(i2w_source)):  \n",
    "    word_source       = model_new_source.index_to_key[ix]\n",
    "    word_target       = model_new_target.index_to_key[ix]\n",
    "    \n",
    "    word_embed_source = bpemb_source.embed(word_source)\n",
    "    word_embed_target = bpemb_target.embed(word_target)\n",
    "\n",
    "    if np.isnan(word_embed_source).any() or np.isnan(word_embed_target).any():\n",
    "        print(word)\n",
    "        non_valid_indices.add(ix)\n",
    "        \n",
    "    if word_embed_source.shape[0] != 1 or word_embed_target.shape[0] != 1:\n",
    "        non_valid_indices.add(ix)\n",
    "        continue \n",
    "\n",
    "    model_new_source.vectors[ix] = word_embed_source[:]\n",
    "    model_new_target.vectors[ix] = word_embed_target[:]\n",
    "    \n",
    "   \n",
    "valid_indices = valid_indices - non_valid_indices\n",
    "print('Number of non valid indices:', len(non_valid_indices))\n",
    "print('Number of valid indices:', len(valid_indices))\n",
    "\n",
    "valid_indices = list(valid_indices)\n",
    "\n",
    "model_new_source.vectors =  model_new_source.vectors[valid_indices]\n",
    "model_new_target.vectors =  model_new_target.vectors[valid_indices]\n",
    "\n",
    "model_new_source.index_to_key = [i2w_source[ix] for ix in valid_indices]\n",
    "model_new_target.index_to_key = [i2w_target[ix] for ix in valid_indices]\n",
    "\n",
    "#model_new_source.key_to_index = {word:i for i, word in enumerate(model_new_source.index_to_key)}\n",
    "#model_new_target.key_to_index = {word:i for i, word in enumerate(model_new_target.index_to_key)}\n",
    "\n",
    "assert len(model_new_source.vectors) == len(model_new_source.index_to_key)\n",
    "#assert len(model_new_source.vectors) == len(model_new_source.key_to_index.keys())\n",
    "assert np.isnan(model_new_source.vectors).sum() == 0\n",
    "\n",
    "assert len(model_new_target.vectors) == len(model_new_target.index_to_key)\n",
    "#assert len(model_new_target.vectors) == len(model_new_target.key_to_index.keys())\n",
    "assert np.isnan(model_new_target.vectors).sum() == 0\n",
    "assert len(model_new_source.vectors) == len(model_new_target.vectors)\n",
    "\n",
    "#model_new_source.save(f'../datasets/{dataset_name}_{lang}_BP_{emb_dim_source}_{vs//1000}K.d2v')\n",
    "#model_new_target.save(f'../datasets/{dataset_name}_{lang}_BP_{emb_dim_target}_{vs//1000}K.d2v')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_source_target = KeyedVectors(vector_size=emb_dim_source)\n",
    "full_len = len(model_new_source.vectors)\n",
    "\n",
    "if emb_dim_source == emb_dim_target:\n",
    "    model_source_target.vectors = np.zeros((2*full_len, emb_dim_source), dtype=np.float32)\n",
    "\n",
    "    for ix in range(2*full_len):\n",
    "        if ix < full_len:\n",
    "            model_source_target.vectors[ix] = model_new_source.vectors[ix]\n",
    "            model_source_target.index_to_key.append(model_new_source.index_to_key[ix])\n",
    "            \n",
    "        else:\n",
    "            model_source_target.vectors[ix] = model_new_target.vectors[ix-full_len]\n",
    "            model_source_target.index_to_key.append(model_new_target.index_to_key[ix-full_len])\n",
    "    \n",
    "    model_source_target.save(f'../datasets/muse_{source_lang}(BP)({emb_dim_source})_{target_lang}(BP)({emb_dim_target}).d2v')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Extracting fastText embeddings from MUSE and saving into KeyedVectors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████| 200000/200000 [00:09<00:00, 21956.38it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████| 200000/200000 [00:09<00:00, 21911.19it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "94681\n"
     ]
    }
   ],
   "source": [
    "data_path = '../datasets'\n",
    "\n",
    "source_lang = 'en'\n",
    "target_lang = 'fr'\n",
    "\n",
    "emb_dim_source  = 300\n",
    "emb_dim_target  = 300\n",
    "vs              = 200000\n",
    "\n",
    "source_path = os.path.join(data_path, f'muse/embeddings/wiki.multi.{source_lang}.vec')\n",
    "target_path = os.path.join(data_path, f'muse/embeddings/wiki.multi.{target_lang}.vec')\n",
    "vocab_path  = os.path.join(data_path, f'muse/dictionaries/{source_lang}-{target_lang}.txt') \n",
    "\n",
    "source_model = open(source_path, encoding='utf-8', errors='surrogateescape')\n",
    "target_model = open(target_path, encoding='utf-8', errors='surrogateescape')\n",
    "\n",
    "i2w_source, vectors_source = read(source_model)\n",
    "i2w_target, vectors_target = read(target_model)\n",
    "\n",
    "model_new_source = KeyedVectors(vector_size=emb_dim_source)\n",
    "model_new_target = KeyedVectors(vector_size=emb_dim_target)\n",
    "\n",
    "model_new_source.index_to_key = i2w_source[:]\n",
    "model_new_target.index_to_key = i2w_target[:]\n",
    "\n",
    "model_new_source.key_to_index = {word: i for i, word in enumerate(i2w_source)}\n",
    "model_new_target.key_to_index = {word: i for i, word in enumerate(i2w_target)}\n",
    "\n",
    "vocab, src2trg = get_dict(vocab_path, model_new_source, model_new_target)\n",
    "\n",
    "valid_keys = []\n",
    "for k in src2trg.keys():\n",
    "    if src2trg[k] != set():\n",
    "        valid_keys.append(k)\n",
    "\n",
    "print(len(valid_keys))\n",
    "\n",
    "indices_source = valid_keys[:]\n",
    "indices_target = [min(src2trg[ix]) for ix in indices_source]\n",
    "\n",
    "words_source = [model_new_source.index_to_key[ix] for ix in indices_source]\n",
    "words_target = [model_new_target.index_to_key[ix] for ix in indices_target]\n",
    "\n",
    "model_new_source.vectors = vectors_source[indices_source]\n",
    "model_new_target.vectors = vectors_target[indices_target]\n",
    "\n",
    "model_new_source.index_to_key = words_source\n",
    "model_new_target.index_to_key = words_target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_source_target = KeyedVectors(vector_size=emb_dim_source)\n",
    "full_len = len(model_new_source.vectors)\n",
    "\n",
    "if emb_dim_source == emb_dim_target:\n",
    "    model_source_target.vectors = np.zeros((2*full_len, emb_dim_source), dtype=np.float32)\n",
    "\n",
    "    for ix in range(2*full_len):\n",
    "        if ix < full_len:\n",
    "            model_source_target.vectors[ix] = model_new_source.vectors[ix]\n",
    "            model_source_target.index_to_key.append(model_new_source.index_to_key[ix])\n",
    "            \n",
    "        else:\n",
    "            model_source_target.vectors[ix] = model_new_target.vectors[ix-full_len]\n",
    "            model_source_target.index_to_key.append(model_new_target.index_to_key[ix-full_len])\n",
    "    \n",
    "    model_source_target.save(f'../datasets/muse_{source_lang}(fT)({emb_dim_source})_{target_lang}(fT)({emb_dim_target}).d2v')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
