{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "bf0a3117-77ac-4a92-8473-8e00db2bb247",
   "metadata": {},
   "source": [
    "# Seed Set Pairing\n",
    "\n",
    "This notebook contains the code for how we generate the `seed_set_pairings.csv` file."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "522a9609-bf69-4d6e-9d5a-34f74a6dbda8",
   "metadata": {},
   "source": [
    "## Set up"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dda7593a-bfca-4e2b-8639-5f3df19b3df2",
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "bd225690-6a92-49e8-bda6-f35a6f14aabf",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "if os.path.isdir(\"../notebooks/\"):\n",
    "    os.chdir(\"../badseeds/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "630dc071-df5b-454d-8cd6-5ec45d57dddf",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import random\n",
    "import itertools\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import seedbank"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "def4d011-2d79-420a-8873-628389540299",
   "metadata": {},
   "outputs": [],
   "source": [
    "# path to config json file containing paths to datasets. change if necessary\n",
    "CONFIG_PATH = \"../config.json\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "08c6fa7e-2c7f-4b99-b53a-6c4f11d404d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(CONFIG_PATH, \"r\") as f:\n",
    "    config = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c52e24e5-19bb-4b1c-a629-71ebd9d818db",
   "metadata": {},
   "outputs": [],
   "source": [
    "# for replicability\n",
    "np.random.seed(42)\n",
    "random.seed(42)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ffc82065-7a90-4169-be3c-f8f59acbeec0",
   "metadata": {},
   "source": [
    "## Code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e0c64eff-ff33-4039-97c8-33cc4915378d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# loading seeds df\n",
    "seeds = seedbank.seedbanking(config[\"seeds\"][\"dir_path\"] + \"seeds.json\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c9efcbdb-8752-4fd5-957f-4edacdc66d62",
   "metadata": {},
   "outputs": [],
   "source": [
    "gathered_seeds = seeds[\"Seeds\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eb1c0702-ed2e-4b20-9e35-e44883ccccdc",
   "metadata": {},
   "source": [
    "We now manually go through each index in the dataframe. We group gathered seed sets, based on whether they originate from the same paper/dataset in and whether the pairing between the seed sets makes sense (human judgement)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "9a24fd39-c87c-4fa6-8b21-bf754c4c61c0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Category                                            female (CNN Daily Mail)\n",
       "Seeds                     [actress, girl, girlfriend, girls, mother, mot...\n",
       "Source / Justification                       curated for the target dataset\n",
       "Source Categories                                                   curated\n",
       "Used in Paper             Identifying and Reducing Gender Bias in Word-L...\n",
       "Link                                                                   None\n",
       "Seeds ID                  female_words_CNN_DailyMail-Bordia_and_Bowman_2019\n",
       "Name: 158, dtype: object"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "seeds.iloc[158]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "56bbfcb2-2dd3-4a85-b670-52f7c250fefa",
   "metadata": {},
   "outputs": [],
   "source": [
    "# manually finding indices, by using the seeds.iloc[num] cell above, for each num\n",
    "group_indices = [\n",
    "    [0, 1],\n",
    "    [2, 3],\n",
    "    [4, 5],\n",
    "    [6, 7],\n",
    "    [8, 9],\n",
    "    [10, 11],\n",
    "    [12, 13],\n",
    "    [14, 15],\n",
    "    [18, 19],\n",
    "    [20, 21],\n",
    "    [22, 23],\n",
    "    # need to skip a few here\n",
    "    [28, 29],\n",
    "    [30, 31],\n",
    "    [32, 33],\n",
    "    [34, 35],\n",
    "    # skip \"equalize\", \"gender specific\"\n",
    "    [40, 41],\n",
    "    [42, 43],\n",
    "    # here we have black, white and asian triplets\n",
    "    [45, 46, 47],\n",
    "    [48, 49, 50],\n",
    "    # and jew, muslim christian triplets\n",
    "    [52, 53, 54],\n",
    "    # and jewish muslim christian troplets\n",
    "    [55, 56, 57],\n",
    "    # skip some tests, now back to pairs\n",
    "    [60, 61],\n",
    "    # skip some weird ones\n",
    "    [66, 67],\n",
    "    # black, asian, white, hispanic, russian, chineze names\n",
    "    [68, 69, 70, 71, 72, 73],\n",
    "    # back to pairs\n",
    "    [78, 79],\n",
    "    [80, 81],\n",
    "    # skip a few unpaired ones\n",
    "    [88, 89],\n",
    "    [90, 91],\n",
    "    [92, 93],\n",
    "    [94, 95],\n",
    "    [96, 97],\n",
    "    [98, 99],\n",
    "    [100, 101],\n",
    "    [102, 103],\n",
    "    [104, 105],\n",
    "    [106, 107],\n",
    "    [108, 109],\n",
    "    # career vs violence doesn't seem appropriate pair\n",
    "    [112, 113],\n",
    "    # white collar job, blue collar jon, domestic work, occupation quadruplet\n",
    "    [115, 116, 117, 118],\n",
    "    [119, 120],\n",
    "    [121, 122],\n",
    "    [123, 124],\n",
    "    # male/female singular/plural\n",
    "    [126, 127],\n",
    "    [126, 128],\n",
    "    [128, 129],\n",
    "    [127, 129],\n",
    "    # back to normal pairs\n",
    "    [131, 132],\n",
    "    [133, 134],\n",
    "    [135, 136],\n",
    "    [137, 138],\n",
    "    # christianity, islam, atheism\n",
    "    [139, 140, 141],\n",
    "    [142, 143],\n",
    "    [144, 145],\n",
    "    [146, 147],\n",
    "    [148, 149],\n",
    "    [150, 151, 152],\n",
    "    [153, 154],\n",
    "    [155, 156],\n",
    "    [157, 158],\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "2b5ebf85-d03e-452b-87c0-2021952ae8c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# For each tuple, find paired combinations with itertools. Need 2 for-loops to flatten\n",
    "pair_indices = [\n",
    "    list(pair) for group in group_indices for pair in itertools.combinations(group, 2)\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "b0d9edc6-0b68-49f2-8416-b2a20cab345f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# convert these to their respective IDs in the seeds dataframe\n",
    "pair_ids = [seeds.iloc[pair][\"Seeds ID\"].tolist() for pair in pair_indices]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "16691f11-edac-4830-825c-bce16fbf6ebf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# convert this to a dataframe of its own\n",
    "pair_df = pd.DataFrame.from_records(pair_ids, columns=[\"ID_A\", \"ID_B\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "a9d657bb-0679-423b-a5f2-82ad64a49fd3",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID_A</th>\n",
       "      <th>ID_B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pleasant-Caliskan_et_al_2017</td>\n",
       "      <td>unpleasant-Caliskan_et_al_2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>flowers-Caliskan_et_al_2017</td>\n",
       "      <td>insects-Caliskan_et_al_2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>instruments-Caliskan_et_al_2017</td>\n",
       "      <td>weapons-Caliskan_et_al_2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>european_american_names-Caliskan_et_al_2017</td>\n",
       "      <td>african_american_names-Caliskan_et_al_2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>european_american_names_market_discrimination-...</td>\n",
       "      <td>african_american_names_market_discrimination-C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85</th>\n",
       "      <td>high_morality_and_low/neutral_warmth-Bhatia_et...</td>\n",
       "      <td>high_competence-Bhatia_et_al_2018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>low/neutral_and_morality_high_warmth-Bhatia_et...</td>\n",
       "      <td>high_competence-Bhatia_et_al_2018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>87</th>\n",
       "      <td>male_words_Penn_Treebank-Bordia_and_Bowman_2019</td>\n",
       "      <td>female_words_Penn_Treebank-Bordia_and_Bowman_2019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>male_words_WikiText_2-Bordia_and_Bowman_2019</td>\n",
       "      <td>female_words_WikiText_2-Bordia_and_Bowman_2019...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89</th>\n",
       "      <td>male_words_CNN_DailyMail-Bordia_and_Bowman_2019</td>\n",
       "      <td>female_words_CNN_DailyMail-Bordia_and_Bowman_2019</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>90 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 ID_A  \\\n",
       "0                        pleasant-Caliskan_et_al_2017   \n",
       "1                         flowers-Caliskan_et_al_2017   \n",
       "2                     instruments-Caliskan_et_al_2017   \n",
       "3         european_american_names-Caliskan_et_al_2017   \n",
       "4   european_american_names_market_discrimination-...   \n",
       "..                                                ...   \n",
       "85  high_morality_and_low/neutral_warmth-Bhatia_et...   \n",
       "86  low/neutral_and_morality_high_warmth-Bhatia_et...   \n",
       "87    male_words_Penn_Treebank-Bordia_and_Bowman_2019   \n",
       "88       male_words_WikiText_2-Bordia_and_Bowman_2019   \n",
       "89    male_words_CNN_DailyMail-Bordia_and_Bowman_2019   \n",
       "\n",
       "                                                 ID_B  \n",
       "0                      unpleasant-Caliskan_et_al_2017  \n",
       "1                         insects-Caliskan_et_al_2017  \n",
       "2                         weapons-Caliskan_et_al_2017  \n",
       "3          african_american_names-Caliskan_et_al_2017  \n",
       "4   african_american_names_market_discrimination-C...  \n",
       "..                                                ...  \n",
       "85                  high_competence-Bhatia_et_al_2018  \n",
       "86                  high_competence-Bhatia_et_al_2018  \n",
       "87  female_words_Penn_Treebank-Bordia_and_Bowman_2019  \n",
       "88  female_words_WikiText_2-Bordia_and_Bowman_2019...  \n",
       "89  female_words_CNN_DailyMail-Bordia_and_Bowman_2019  \n",
       "\n",
       "[90 rows x 2 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# here's what it looks like\n",
    "pair_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "13d62403-a90b-41cf-896d-87c5f6543b3c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# save to disk\n",
    "pair_df.to_csv(config[\"pairs\"][\"dir_path\"] + \"seed_set_pairings.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "83c6fe3a-4b95-4150-9bfd-5f5fd0332255",
   "metadata": {},
   "outputs": [],
   "source": [
    "# you can read it back from disk like this\n",
    "pair_df_new = pd.read_csv(config[\"pairs\"][\"dir_path\"] + \"seed_set_pairings.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "3a9f3217-2f6f-41cd-a0d0-4a2de097935b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID_A</th>\n",
       "      <th>ID_B</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>pleasant-Caliskan_et_al_2017</td>\n",
       "      <td>unpleasant-Caliskan_et_al_2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>flowers-Caliskan_et_al_2017</td>\n",
       "      <td>insects-Caliskan_et_al_2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>instruments-Caliskan_et_al_2017</td>\n",
       "      <td>weapons-Caliskan_et_al_2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>european_american_names-Caliskan_et_al_2017</td>\n",
       "      <td>african_american_names-Caliskan_et_al_2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>european_american_names_market_discrimination-...</td>\n",
       "      <td>african_american_names_market_discrimination-C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>85</th>\n",
       "      <td>high_morality_and_low/neutral_warmth-Bhatia_et...</td>\n",
       "      <td>high_competence-Bhatia_et_al_2018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>low/neutral_and_morality_high_warmth-Bhatia_et...</td>\n",
       "      <td>high_competence-Bhatia_et_al_2018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>87</th>\n",
       "      <td>male_words_Penn_Treebank-Bordia_and_Bowman_2019</td>\n",
       "      <td>female_words_Penn_Treebank-Bordia_and_Bowman_2019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>male_words_WikiText_2-Bordia_and_Bowman_2019</td>\n",
       "      <td>female_words_WikiText_2-Bordia_and_Bowman_2019...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>89</th>\n",
       "      <td>male_words_CNN_DailyMail-Bordia_and_Bowman_2019</td>\n",
       "      <td>female_words_CNN_DailyMail-Bordia_and_Bowman_2019</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>90 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 ID_A  \\\n",
       "0                        pleasant-Caliskan_et_al_2017   \n",
       "1                         flowers-Caliskan_et_al_2017   \n",
       "2                     instruments-Caliskan_et_al_2017   \n",
       "3         european_american_names-Caliskan_et_al_2017   \n",
       "4   european_american_names_market_discrimination-...   \n",
       "..                                                ...   \n",
       "85  high_morality_and_low/neutral_warmth-Bhatia_et...   \n",
       "86  low/neutral_and_morality_high_warmth-Bhatia_et...   \n",
       "87    male_words_Penn_Treebank-Bordia_and_Bowman_2019   \n",
       "88       male_words_WikiText_2-Bordia_and_Bowman_2019   \n",
       "89    male_words_CNN_DailyMail-Bordia_and_Bowman_2019   \n",
       "\n",
       "                                                 ID_B  \n",
       "0                      unpleasant-Caliskan_et_al_2017  \n",
       "1                         insects-Caliskan_et_al_2017  \n",
       "2                         weapons-Caliskan_et_al_2017  \n",
       "3          african_american_names-Caliskan_et_al_2017  \n",
       "4   african_american_names_market_discrimination-C...  \n",
       "..                                                ...  \n",
       "85                  high_competence-Bhatia_et_al_2018  \n",
       "86                  high_competence-Bhatia_et_al_2018  \n",
       "87  female_words_Penn_Treebank-Bordia_and_Bowman_2019  \n",
       "88  female_words_WikiText_2-Bordia_and_Bowman_2019...  \n",
       "89  female_words_CNN_DailyMail-Bordia_and_Bowman_2019  \n",
       "\n",
       "[90 rows x 2 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# looks like you'd expect\n",
    "pair_df_new"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "c06a5d3e-e9f4-4a9c-934f-d5ac8f022b9f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('pleasant-Caliskan_et_al_2017', 'unpleasant-Caliskan_et_al_2017'),\n",
       " ('flowers-Caliskan_et_al_2017', 'insects-Caliskan_et_al_2017'),\n",
       " ('instruments-Caliskan_et_al_2017', 'weapons-Caliskan_et_al_2017'),\n",
       " ('european_american_names-Caliskan_et_al_2017',\n",
       "  'african_american_names-Caliskan_et_al_2017'),\n",
       " ('european_american_names_market_discrimination-Caliskan_et_al_2017',\n",
       "  'african_american_names_market_discrimination-Caliskan_et_al_2017'),\n",
       " ('pleasantness-Caliskan_et_al_2017', 'unpleasantness-Caliskan_et_al_2017'),\n",
       " ('male_names_1-Caliskan_et_al_2017', 'female_names_1-Caliskan_et_al_2017'),\n",
       " ('career-Caliskan_et_al_2017', 'family-Caliskan_et_al_2017'),\n",
       " ('male_1-Caliskan_et_al_2017', 'female_1-Caliskan_et_al_2017'),\n",
       " ('science_1-Caliskan_et_al_2017', 'arts_2-Caliskan_et_al_2017'),\n",
       " ('male_2-Caliskan_et_al_2017', 'female_2-Caliskan_et_al_2017'),\n",
       " ('temporary-Caliskan_et_al_2017', 'permanent-Caliskan_et_al_2017'),\n",
       " ('young_names-Caliskan_et_al_2017', 'old_names-Caliskan_et_al_2017'),\n",
       " ('pleasant_6-Caliskan_et_al_2017', 'unpleasant_6-Caliskan_et_al_2017'),\n",
       " ('definitional_female-Bolukbasi_et_al_2016',\n",
       "  'definitional_male-Bolukbasi_et_al_2016'),\n",
       " ('male-Manzini_et_al_2019', 'female-Manzini_et_al_2019'),\n",
       " ('male_roles-Manzini_et_al_2019', 'female_roles-Manzini_et_al_2019'),\n",
       " ('black-Manzini_et_al_2019', 'caucasian-Manzini_et_al_2019'),\n",
       " ('black-Manzini_et_al_2019', 'asian-Manzini_et_al_2019'),\n",
       " ('caucasian-Manzini_et_al_2019', 'asian-Manzini_et_al_2019'),\n",
       " ('black_roles-Manzini_et_al_2019', 'caucasian_roles-Manzini_et_al_2019'),\n",
       " ('black_roles-Manzini_et_al_2019', 'asian_roles-Manzini_et_al_2019'),\n",
       " ('caucasian_roles-Manzini_et_al_2019', 'asian_roles-Manzini_et_al_2019'),\n",
       " ('jew-Manzini_et_al_2019', 'christian-Manzini_et_al_2019'),\n",
       " ('jew-Manzini_et_al_2019', 'muslim-Manzini_et_al_2019'),\n",
       " ('christian-Manzini_et_al_2019', 'muslim-Manzini_et_al_2019'),\n",
       " ('jewish_attributes-Manzini_et_al_2019',\n",
       "  'christian_attributes-Manzini_et_al_2019'),\n",
       " ('jewish_attributes-Manzini_et_al_2019',\n",
       "  'muslim_attributes-Manzini_et_al_2019'),\n",
       " ('christian_attributes-Manzini_et_al_2019',\n",
       "  'muslim_attributes-Manzini_et_al_2019'),\n",
       " ('adjectives_appearance-Garg_et_al_2018',\n",
       "  'adjectives_intelligence-Garg_et_al_2018'),\n",
       " ('female_pairs-Garg_et_al_2018', 'male_pairs-Garg_et_al_2018'),\n",
       " ('names_asian-Garg_et_al_2018', 'names_black-Garg_et_al_2018'),\n",
       " ('names_asian-Garg_et_al_2018', 'names_chinese-Garg_et_al_2018'),\n",
       " ('names_asian-Garg_et_al_2018', 'names_hispanic-Garg_et_al_2018'),\n",
       " ('names_asian-Garg_et_al_2018', 'names_russian-Garg_et_al_2018'),\n",
       " ('names_asian-Garg_et_al_2018', 'names_white-Garg_et_al_2018'),\n",
       " ('names_black-Garg_et_al_2018', 'names_chinese-Garg_et_al_2018'),\n",
       " ('names_black-Garg_et_al_2018', 'names_hispanic-Garg_et_al_2018'),\n",
       " ('names_black-Garg_et_al_2018', 'names_russian-Garg_et_al_2018'),\n",
       " ('names_black-Garg_et_al_2018', 'names_white-Garg_et_al_2018'),\n",
       " ('names_chinese-Garg_et_al_2018', 'names_hispanic-Garg_et_al_2018'),\n",
       " ('names_chinese-Garg_et_al_2018', 'names_russian-Garg_et_al_2018'),\n",
       " ('names_chinese-Garg_et_al_2018', 'names_white-Garg_et_al_2018'),\n",
       " ('names_hispanic-Garg_et_al_2018', 'names_russian-Garg_et_al_2018'),\n",
       " ('names_hispanic-Garg_et_al_2018', 'names_white-Garg_et_al_2018'),\n",
       " ('names_russian-Garg_et_al_2018', 'names_white-Garg_et_al_2018'),\n",
       " ('christianity-Garg_et_al_2018', 'islam-Garg_et_al_2018'),\n",
       " ('terrorism-Garg_et_al_2018', 'occupations-Kozlowski_et_al_2019'),\n",
       " ('male-Kozlowski_et_al_2019', 'female-Kozlowski_et_al_2019'),\n",
       " ('upperclass-Kozlowski_et_al_2019', 'lowerclass-Kozlowski_et_al_2019'),\n",
       " ('black-Kozlowski_et_al_2019', 'white-Kozlowski_et_al_2019'),\n",
       " ('female_names-Gonen_&_Goldberg_2019', 'male_names-Gonen_&_Goldberg_2019'),\n",
       " ('family_words-Gonen_&_Goldberg_2019', 'career_words-Gonen_&_Goldberg_2019'),\n",
       " ('arts_words-Gonen_&_Goldberg_2019', 'math_words-Gonen_&_Goldberg_2019'),\n",
       " ('arts_words_2-Gonen_&_Goldberg_2019', 'science_words-Gonen_&_Goldberg_2019'),\n",
       " ('female_definition_words_1-Zhao_et_al_2018',\n",
       "  'male_definition_words_1-Zhao_et_al_2018'),\n",
       " ('male_definition_words_2-Zhao_et_al_2018',\n",
       "  'female_definition_words_2-Zhao_et_al_2018'),\n",
       " ('male_stereotype_words-Zhao_et_al_2018',\n",
       "  'female_stereotype_words-Zhao_et_al_2018'),\n",
       " ('female-Rudinger_et_al_2017', 'male-Rudinger_et_al_2017'),\n",
       " ('female_2-Rudinger_et_al_2017', 'male_2-Rudinger_et_al_2017'),\n",
       " ('white_collar_job-Fast_et_al_2016', 'blue_collar_job-Fast_et_al_2016'),\n",
       " ('white_collar_job-Fast_et_al_2016', 'domestic_work-Fast_et_al_2016'),\n",
       " ('white_collar_job-Fast_et_al_2016', 'occupation-Fast_et_al_2016'),\n",
       " ('blue_collar_job-Fast_et_al_2016', 'domestic_work-Fast_et_al_2016'),\n",
       " ('blue_collar_job-Fast_et_al_2016', 'occupation-Fast_et_al_2016'),\n",
       " ('domestic_work-Fast_et_al_2016', 'occupation-Fast_et_al_2016'),\n",
       " ('attractive-Fast_et_al_2016', 'ugliness-Fast_et_al_2016'),\n",
       " ('masculine-Fast_et_al_2016', 'feminine-Fast_et_al_2016'),\n",
       " ('positive_emotion-Fast_et_al_2016', 'negative_emotion-Fast_et_al_2016'),\n",
       " ('male_singular-Hoyle_et_al_2019', 'male_plural-Hoyle_et_al_2019'),\n",
       " ('male_singular-Hoyle_et_al_2019', 'female_singular-Hoyle_et_al_2019'),\n",
       " ('female_singular-Hoyle_et_al_2019', 'female_plural-Hoyle_et_al_2019'),\n",
       " ('male_plural-Hoyle_et_al_2019', 'female_plural-Hoyle_et_al_2019'),\n",
       " ('male_names-Knoche_et_al_2019', 'female_names-Knoche_et_al_2019'),\n",
       " ('male_terms-Knoche_et_al_2019', 'female_terms-Knoche_et_al_2019'),\n",
       " ('male-Knoche_et_al_2019', 'female-Knoche_et_al_2019'),\n",
       " ('white_names-Knoche_et_al_2019', 'black_names-Knoche_et_al_2019'),\n",
       " ('christianity_words-Knoche_et_al_2019', 'islam_words-Knoche_et_al_2019'),\n",
       " ('christianity_words-Knoche_et_al_2019', 'atheism_words-Knoche_et_al_2019'),\n",
       " ('islam_words-Knoche_et_al_2019', 'atheism_words-Knoche_et_al_2019'),\n",
       " ('pleasant-Knoche_et_al_2019', 'unpleasant-Knoche_et_al_2019'),\n",
       " ('science-Knoche_et_al_2019', 'art-Knoche_et_al_2019'),\n",
       " ('intellectual_words-Knoche_et_al_2019',\n",
       "  'appearance_words-Knoche_et_al_2019'),\n",
       " ('career-Knoche_et_al_2019', 'family-Knoche_et_al_2019'),\n",
       " ('high_morality_and_low/neutral_warmth-Bhatia_et_al_2018',\n",
       "  'low/neutral_and_morality_high_warmth-Bhatia_et_al_2018'),\n",
       " ('high_morality_and_low/neutral_warmth-Bhatia_et_al_2018',\n",
       "  'high_competence-Bhatia_et_al_2018'),\n",
       " ('low/neutral_and_morality_high_warmth-Bhatia_et_al_2018',\n",
       "  'high_competence-Bhatia_et_al_2018'),\n",
       " ('male_words_Penn_Treebank-Bordia_and_Bowman_2019',\n",
       "  'female_words_Penn_Treebank-Bordia_and_Bowman_2019'),\n",
       " ('male_words_WikiText_2-Bordia_and_Bowman_2019',\n",
       "  'female_words_WikiText_2-Bordia_and_Bowman_2019-Bordia_and_Bowman_2019'),\n",
       " ('male_words_CNN_DailyMail-Bordia_and_Bowman_2019',\n",
       "  'female_words_CNN_DailyMail-Bordia_and_Bowman_2019')]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# can convert it to a list of tuples for example\n",
    "[tuple(x) for x in pair_df_new.to_records(index=False)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0640e271-4782-49ba-876b-b46478785a7f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
