{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "282ea44b-4927-4ab2-a84a-bdd68ab3c19d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import embeddings_aux as aux\n",
    "import weat\n",
    "import web"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "386458f8-ebd8-4997-beb8-5e792c0a9544",
   "metadata": {},
   "outputs": [],
   "source": [
    "vec_arr_orig, vocab_orig = aux.load_glove('./data/umbc_orig_vecs.txt')\n",
    "vec_arr_proposed, vocab_proposed = aux.load_glove('./data/umbc_proposed_vecs.txt')\n",
    "vec_arr_cds, vocab_cds = aux.load_glove('./data/umbc_cds_vecs.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "91627e34-ca78-4cad-aaf1-44302c6ddf0d",
   "metadata": {},
   "outputs": [],
   "source": [
    "w2i_orig = {w: i for i, w in enumerate(vocab_orig)}\n",
    "w2i_proposed = {w: i for i, w in enumerate(vocab_proposed)}\n",
    "w2i_cds = {w: i for i, w in enumerate(vocab_cds)}"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ce0ab371-4942-421f-be57-5f9e9d6a47df",
   "metadata": {},
   "source": [
    "### Original GloVe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb5035d5-6313-4041-a4c0-6979bdfeaec3",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_orig = weat.weat(vec_arr_orig, w2i_orig, tests=['ma_mf', 'sa_mf', 'mf_ch', 'fi_pu', 'mw_pu', 'in_ja', 'in_ma', 'in_sa', 'in_ca'])\n",
    "df_orig"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a860507-2d2c-46fe-9c15-48f570872205",
   "metadata": {},
   "source": [
    "### Bias Mitigated GloVe (proposed method)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dee0302e-681a-44d2-b557-ee94a2a9dfc5",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_proposed = weat.weat(vec_arr_proposed, w2i_proposed, tests=['ma_mf', 'sa_mf', 'mf_ch', 'fi_pu', 'mw_pu', 'in_ja', 'in_ma', 'in_sa', 'in_ca'])\n",
    "df_proposed"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3486500d-8b33-4f7f-9a97-f8424696061a",
   "metadata": {},
   "source": [
    "### CDS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "331b036e-add0-4798-8de1-76763a9c911c",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_cds = weat.weat(vec_arr_cds, w2i_cds,tests=['ma_mf', 'sa_mf', 'mf_ch', 'fi_pu', 'mw_pu', 'in_ja', 'in_ma', 'in_sa', 'in_ca'])\n",
    "df_cds"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "863ce0b5-13bd-4b94-86d0-f12158cd7f18",
   "metadata": {},
   "source": [
    "## WEB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60a7c541-2750-48cd-bd2b-d5b0453f8183",
   "metadata": {},
   "outputs": [],
   "source": [
    "df_web = web.eval_embedding(['./data/umbc_orig_vecs.txt',\n",
    "                             './data/umbc_debiased_vecs.txt',\n",
    "                             './data/umbc_cds_vecs.txt'])\n",
    "df_web"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
