{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import random\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "# White first names\n",
    "white_male_first_names = [\n",
    "    \"James\", \"John\", \"Robert\", \"Michael\", \"William\", \"David\", \"Richard\", \n",
    "    \"Joseph\", \"Thomas\", \"Charles\", \"Christopher\", \"Daniel\", \"Matthew\", \n",
    "    \"Anthony\", \"Mark\", \"Donald\", \"Steven\", \"Paul\", \"Andrew\", \"Joshua\"\n",
    "]\n",
    "\n",
    "white_female_first_names = [\n",
    "    \"Mary\", \"Patricia\", \"Jennifer\", \"Linda\", \"Elizabeth\", \"Barbara\", \"Susan\", \n",
    "    \"Jessica\", \"Sarah\", \"Karen\", \"Nancy\", \"Lisa\", \"Margaret\", \"Betty\", \n",
    "    \"Sandra\", \"Ashley\", \"Kimberly\", \"Emily\", \"Donna\", \"Michelle\"\n",
    "]\n",
    "\n",
    "# Asian first names\n",
    "asian_male_first_names = [\n",
    "    \"Liu\", \"Wei\", \"Min\", \"Ying\", \"Hao\", \"Jia\", \"Jun\", \"Li\", \"Chen\", \n",
    "    \"Tuan\", \"Anh\", \"Tran\", \"Ngoc\", \"Sun\", \"Raj\", \"Arjun\", \"Ravi\", \n",
    "    \"Amit\", \"Kai\", \"Jin\"\n",
    "]\n",
    "\n",
    "asian_female_first_names = [\n",
    "    \"Yumi\", \"Soo\", \"Lina\", \"Mei\", \"Hana\", \"Yuna\", \"Mina\", \"Jia\", \"Wei\", \n",
    "    \"Sakura\", \"Kim\", \"Mi\", \"Aya\", \"Li\", \"Trang\", \"An\", \"Hanh\", \n",
    "    \"Priya\", \"Aisha\", \"Ying\"\n",
    "]\n",
    "\n",
    "# Black first names\n",
    "black_male_first_names = [\n",
    "    \"Darnell\", \"Jerome\", \"Leroy\", \"Tyrone\", \"Darius\", \"Malik\", \"Marquis\", \n",
    "    \"DeShawn\", \"Andre\", \"Jamal\", \"Maurice\", \"Tremayne\", \"Rashad\", \n",
    "    \"Trevon\", \"Dante\", \"Lamont\", \"Terrence\", \"Malcolm\", \"Kareem\", \"Cedric\"\n",
    "]\n",
    "\n",
    "black_female_first_names = [\n",
    "    \"Aaliyah\", \"Keisha\", \"Latoya\", \"Tamika\", \"Monique\", \"Jasmine\", \n",
    "    \"Imani\", \"Ebony\", \"Shanice\", \"Tiana\", \"Kiara\", \"Nia\", \"Lashonda\", \n",
    "    \"Tanisha\", \"Desiree\", \"Tiara\", \"Ayanna\", \"Zaria\", \"Raven\", \"Kiana\"\n",
    "]\n",
    "\n",
    "# Hispanic first names\n",
    "hispanic_male_first_names = [\n",
    "    \"Jose\", \"Luis\", \"Carlos\", \"Juan\", \"Jorge\", \"Miguel\", \"Angel\", \n",
    "    \"Francisco\", \"Pedro\", \"Alejandro\", \"Manuel\", \"Roberto\", \"Rafael\", \n",
    "    \"Fernando\", \"Ricardo\", \"Diego\", \"Eduardo\", \"Mario\", \"Julio\", \"Enrique\"\n",
    "]\n",
    "\n",
    "hispanic_female_first_names = [\n",
    "    \"Maria\", \"Sofia\", \"Camila\", \"Valentina\", \"Isabella\", \"Martina\", \"Lucia\", \n",
    "    \"Victoria\", \"Ximena\", \"Fernanda\", \"Gabriela\", \"Daniela\", \"Natalia\", \n",
    "    \"Andrea\", \"Juliana\", \"Carolina\", \"Ariana\", \"Paola\", \"Alejandra\", \"Viviana\"\n",
    "]\n",
    "\n",
    "# Surnames (same for both genders)\n",
    "white_surnames = [\n",
    "    \"Smith\", \"Johnson\", \"Williams\", \"Brown\", \"Jones\", \"Miller\", \"Davis\", \n",
    "    \"Wilson\", \"Moore\", \"Taylor\", \"Anderson\", \"Thomas\", \"Jackson\", \n",
    "    \"White\", \"Harris\", \"Martin\", \"Thompson\", \"Garcia\", \"Martinez\", \"Robinson\"\n",
    "]\n",
    "\n",
    "asian_surnames = [\n",
    "    \"Lee\", \"Kim\", \"Chen\", \"Wong\", \"Liu\", \"Yang\", \"Zhang\", \"Lin\", \"Huang\", \n",
    "    \"Wang\", \"Li\", \"Nguyen\", \"Tran\", \"Pham\", \"Chung\", \"Huynh\", \"Choi\", \n",
    "    \"Park\", \"Shah\", \"Singh\"\n",
    "]\n",
    "\n",
    "black_surnames = [\n",
    "    \"Williams\", \"Johnson\", \"Smith\", \"Jones\", \"Brown\", \"Jackson\", \"Davis\", \n",
    "    \"Harris\", \"Robinson\", \"Thomas\", \"Walker\", \"White\", \"Taylor\", \n",
    "    \"Thompson\", \"Moore\", \"Anderson\", \"Lewis\", \"King\", \"Scott\", \"Green\"\n",
    "]\n",
    "\n",
    "hispanic_surnames = [\n",
    "    \"Garcia\", \"Martinez\", \"Rodriguez\", \"Hernandez\", \"Lopez\", \"Gonzalez\", \n",
    "    \"Perez\", \"Sanchez\", \"Ramirez\", \"Torres\", \"Flores\", \"Rivera\", \"Gomez\", \n",
    "    \"Diaz\", \"Reyes\", \"Cruz\", \"Morales\", \"Ortiz\", \"Gutierrez\", \"Ramos\"\n",
    "]\n",
    "\n",
    "# Mapping race and gender to first and last name lists\n",
    "name_mapping = {\n",
    "    \"white\": {\n",
    "        \"Male\": (white_male_first_names, white_surnames),\n",
    "        \"Female\": (white_female_first_names, white_surnames)\n",
    "    },\n",
    "    \"asian\": {\n",
    "        \"Male\": (asian_male_first_names, asian_surnames),\n",
    "        \"Female\": (asian_female_first_names, asian_surnames)\n",
    "    },\n",
    "    \"black\": {\n",
    "        \"Male\": (black_male_first_names, black_surnames),\n",
    "        \"Female\": (black_female_first_names, black_surnames)\n",
    "    },\n",
    "    \"hispanic\": {\n",
    "        \"Male\": (hispanic_male_first_names, hispanic_surnames),\n",
    "        \"Female\": (hispanic_female_first_names, hispanic_surnames)\n",
    "    }\n",
    "}\n",
    "\n",
    "def sample_names(race, gender, n=10):\n",
    "    \"\"\"\n",
    "    Sample a specified number of names for a given race and gender.\n",
    "    \n",
    "    Args:\n",
    "    - race (str): The race for which to sample names (e.g., \"white\", \"asian\", \"black\", \"hispanic\").\n",
    "    - gender (str): The gender for which to sample names (\"Male\" or \"Female\").\n",
    "    - n (int): The number of names to sample (default is 10).\n",
    "    \n",
    "    Returns:\n",
    "    - List of sampled names (first name + last name).\n",
    "    \"\"\"\n",
    "    if race not in name_mapping or gender not in name_mapping[race]:\n",
    "        raise ValueError(\"Race or gender not recognized. Valid options are: race - 'white', 'asian', 'black', 'hispanic'; gender - 'Male', 'Female'.\")\n",
    "    \n",
    "    first_names, last_names = name_mapping[race][gender]\n",
    "    sampled_names = []\n",
    "    \n",
    "    for _ in range(n):\n",
    "        first_name = random.choice(first_names)\n",
    "        last_name = random.choice(last_names)\n",
    "        sampled_names.append(f\"{first_name} {last_name}\")\n",
    "    \n",
    "    return sampled_names\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generated personas saved to 'generated_personas.csv'\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Education</th>\n",
       "      <th>Class of Worker</th>\n",
       "      <th>Marital Status</th>\n",
       "      <th>Place of Birth</th>\n",
       "      <th>Big Five Scores 1</th>\n",
       "      <th>Big Five Scores 2</th>\n",
       "      <th>Defining Quirks</th>\n",
       "      <th>Personal Time</th>\n",
       "      <th>...</th>\n",
       "      <th>Fertility</th>\n",
       "      <th>Income Bracket</th>\n",
       "      <th>Housing Situation</th>\n",
       "      <th>Relationship with Technology</th>\n",
       "      <th>Hobbies</th>\n",
       "      <th>Communication Style</th>\n",
       "      <th>Risk Tolerance</th>\n",
       "      <th>Travel Frequency</th>\n",
       "      <th>Pet Ownership</th>\n",
       "      <th>person_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>36</td>\n",
       "      <td>Male</td>\n",
       "      <td>Bachelor's Degree</td>\n",
       "      <td>Public</td>\n",
       "      <td>Single</td>\n",
       "      <td>Connecticut</td>\n",
       "      <td>High openness</td>\n",
       "      <td>High agreeableness</td>\n",
       "      <td>Introverted</td>\n",
       "      <td>Gaming</td>\n",
       "      <td>...</td>\n",
       "      <td>Has children</td>\n",
       "      <td>High income</td>\n",
       "      <td>Rents</td>\n",
       "      <td>Tech-savvy</td>\n",
       "      <td>Photography</td>\n",
       "      <td>Open</td>\n",
       "      <td>Risk-averse</td>\n",
       "      <td>Rare traveler</td>\n",
       "      <td>Owns a dog</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>27</td>\n",
       "      <td>Female</td>\n",
       "      <td>Associate's Degree</td>\n",
       "      <td>Self-Employed</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>High neuroticism</td>\n",
       "      <td>High openness</td>\n",
       "      <td>Very social</td>\n",
       "      <td>Cooking</td>\n",
       "      <td>...</td>\n",
       "      <td>Has children</td>\n",
       "      <td>Upper-middle income</td>\n",
       "      <td>Owns home</td>\n",
       "      <td>Tech-averse</td>\n",
       "      <td>Gardening</td>\n",
       "      <td>Direct</td>\n",
       "      <td>Moderate risk-taker</td>\n",
       "      <td>Rare traveler</td>\n",
       "      <td>Owns a cat</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>43</td>\n",
       "      <td>Female</td>\n",
       "      <td>Master's Degree</td>\n",
       "      <td>Self-Employed</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Canada</td>\n",
       "      <td>High conscientiousness</td>\n",
       "      <td>High agreeableness</td>\n",
       "      <td>Loves puzzles</td>\n",
       "      <td>Gaming</td>\n",
       "      <td>...</td>\n",
       "      <td>Undecided</td>\n",
       "      <td>Low income</td>\n",
       "      <td>Owns home</td>\n",
       "      <td>Familiar</td>\n",
       "      <td>Crafting</td>\n",
       "      <td>Humorous</td>\n",
       "      <td>Moderate risk-taker</td>\n",
       "      <td>Frequent traveler</td>\n",
       "      <td>Owns other pets</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>30</td>\n",
       "      <td>Female</td>\n",
       "      <td>Bachelor's Degree</td>\n",
       "      <td>Self-Employed</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Florida</td>\n",
       "      <td>High openness</td>\n",
       "      <td>High openness</td>\n",
       "      <td>Extremely organized</td>\n",
       "      <td>Reading</td>\n",
       "      <td>...</td>\n",
       "      <td>Undecided</td>\n",
       "      <td>Low income</td>\n",
       "      <td>Rents</td>\n",
       "      <td>Tech-averse</td>\n",
       "      <td>Photography</td>\n",
       "      <td>Direct</td>\n",
       "      <td>Moderate risk-taker</td>\n",
       "      <td>Rare traveler</td>\n",
       "      <td>Owns other pets</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>32</td>\n",
       "      <td>Female</td>\n",
       "      <td>Associate's Degree</td>\n",
       "      <td>Public</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Canada</td>\n",
       "      <td>High conscientiousness</td>\n",
       "      <td>High conscientiousness</td>\n",
       "      <td>Always punctual</td>\n",
       "      <td>Cooking</td>\n",
       "      <td>...</td>\n",
       "      <td>Planning to have children</td>\n",
       "      <td>Low income</td>\n",
       "      <td>Owns home</td>\n",
       "      <td>Tech-averse</td>\n",
       "      <td>Hiking</td>\n",
       "      <td>Open</td>\n",
       "      <td>High risk-taker</td>\n",
       "      <td>Occasional traveler</td>\n",
       "      <td>Owns a cat</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Age     Sex           Education Class of Worker Marital Status  \\\n",
       "0   36    Male   Bachelor's Degree          Public         Single   \n",
       "1   27  Female  Associate's Degree   Self-Employed       Divorced   \n",
       "2   43  Female     Master's Degree   Self-Employed       Divorced   \n",
       "3   30  Female   Bachelor's Degree   Self-Employed       Divorced   \n",
       "4   32  Female  Associate's Degree          Public       Divorced   \n",
       "\n",
       "  Place of Birth       Big Five Scores 1       Big Five Scores 2  \\\n",
       "0    Connecticut           High openness      High agreeableness   \n",
       "1     New Jersey        High neuroticism           High openness   \n",
       "2         Canada  High conscientiousness      High agreeableness   \n",
       "3        Florida           High openness           High openness   \n",
       "4         Canada  High conscientiousness  High conscientiousness   \n",
       "\n",
       "       Defining Quirks Personal Time  ...                  Fertility  \\\n",
       "0          Introverted        Gaming  ...               Has children   \n",
       "1          Very social       Cooking  ...               Has children   \n",
       "2        Loves puzzles        Gaming  ...                  Undecided   \n",
       "3  Extremely organized       Reading  ...                  Undecided   \n",
       "4      Always punctual       Cooking  ...  Planning to have children   \n",
       "\n",
       "        Income Bracket Housing Situation Relationship with Technology  \\\n",
       "0          High income             Rents                   Tech-savvy   \n",
       "1  Upper-middle income         Owns home                  Tech-averse   \n",
       "2           Low income         Owns home                     Familiar   \n",
       "3           Low income             Rents                  Tech-averse   \n",
       "4           Low income         Owns home                  Tech-averse   \n",
       "\n",
       "       Hobbies Communication Style       Risk Tolerance     Travel Frequency  \\\n",
       "0  Photography                Open          Risk-averse        Rare traveler   \n",
       "1    Gardening              Direct  Moderate risk-taker        Rare traveler   \n",
       "2     Crafting            Humorous  Moderate risk-taker    Frequent traveler   \n",
       "3  Photography              Direct  Moderate risk-taker        Rare traveler   \n",
       "4       Hiking                Open      High risk-taker  Occasional traveler   \n",
       "\n",
       "     Pet Ownership person_id  \n",
       "0       Owns a dog         0  \n",
       "1       Owns a cat         1  \n",
       "2  Owns other pets         2  \n",
       "3  Owns other pets         3  \n",
       "4       Owns a cat         4  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "attribute_dict = {\n",
    "    \"Age\": list(np.arange(25, 45)),\n",
    "    \"Sex\": [\"Male\", \"Female\"],\n",
    "    \"Education\": [\"Associate's Degree\", \"Bachelor's Degree\", \"Master's Degree\"],\n",
    "    \"Class of Worker\": [\"Private\", \"Public\", \"Self-Employed\"],\n",
    "    \"Marital Status\": [\"Single\", \"Married\", \"Divorced\"],\n",
    "    \"Place of Birth\": [\"New York\", \"New Jersey\", \"Connecticut\", \"Canada\", \"Pennsylvania\", \"California\", \"Florida\"],\n",
    "    \"Big Five Scores 1\": [\"High openness\", \"High conscientiousness\", \"High extraversion\", \"High agreeableness\", \"High neuroticism\"],\n",
    "    \"Big Five Scores 2\": [\"High openness\", \"High conscientiousness\", \"High extraversion\", \"High agreeableness\", \"High neuroticism\"],\n",
    "    \"Defining Quirks\": [\"Always punctual\", \"Loves puzzles\", \"Extremely organized\", \"Very social\", \"Introverted\"],\n",
    "    \"Personal Time\": [\"Reading\", \"Playing sports\", \"Gaming\", \"Cooking\", \"Traveling\"],\n",
    "    \"Lifestyle\": [\"Active\", \"Sedentary\", \"Balanced\", \"Workaholic\", \"Laid-back\"],\n",
    "    \"Political Views\": [\"Democrat\", \"Republican\", \"Independent\", \"Green\", \"Libertarian\"],\n",
    "    \"Fertility\": [\"Has children\", \"Does not have children\", \"Planning to have children\", \"Undecided\"],\n",
    "    \"Income Bracket\": [\"Low income\", \"Middle income\", \"Upper-middle income\", \"High income\"],\n",
    "    \"Housing Situation\": [\"Owns home\", \"Rents\"],\n",
    "    \"Relationship with Technology\": [\"Tech-savvy\", \"Familiar\", \"Tech-averse\"],\n",
    "    \"Hobbies\": [\"Gardening\", \"Photography\", \"Crafting\", \"Hiking\", \"Playing musical instruments\"],\n",
    "    \"Communication Style\": [\"Direct\", \"Diplomatic\", \"Reserved\", \"Open\", \"Humorous\"],\n",
    "    \"Risk Tolerance\": [\"Risk-averse\", \"Moderate risk-taker\", \"High risk-taker\"],\n",
    "    \"Travel Frequency\": [\"Frequent traveler\", \"Occasional traveler\", \"Rare traveler\", \"Never travels\"],\n",
    "    \"Pet Ownership\": [\"Owns a dog\", \"Owns a cat\", \"Owns other pets\", \"No pets\"],\n",
    "}\n",
    "\n",
    "\n",
    "# Function to generate random personas\n",
    "def generate_personas(num_personas, attribute_dict):\n",
    "    personas = []\n",
    "    keys = list(attribute_dict.keys())\n",
    "    \n",
    "    for _ in range(num_personas):\n",
    "        persona = {key: random.choice(attribute_dict[key]) for key in keys}\n",
    "        personas.append(persona)\n",
    "    \n",
    "    return pd.DataFrame(personas)\n",
    "\n",
    "\n",
    "personas_df = generate_personas(250, attribute_dict)\n",
    "personas_df[\"person_id\"] = list(range(len(personas_df)))\n",
    "personas_df.to_csv(\"./data/generated_personas.csv\", index=False)\n",
    "print(\"Generated personas saved to 'generated_personas.csv'\")\n",
    "personas_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Generated names saved to 'generated_names.csv'\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>person_id</th>\n",
       "      <th>Race</th>\n",
       "      <th>Name</th>\n",
       "      <th>Email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>white</td>\n",
       "      <td>Richard Brown</td>\n",
       "      <td>richardbrown@gmail.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>black</td>\n",
       "      <td>DeShawn Thomas</td>\n",
       "      <td>deshawnthomas@gmail.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>asian</td>\n",
       "      <td>Sun Zhang</td>\n",
       "      <td>sunzhang@gmail.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>hispanic</td>\n",
       "      <td>Enrique Reyes</td>\n",
       "      <td>enriquereyes@gmail.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>white</td>\n",
       "      <td>Margaret Brown</td>\n",
       "      <td>margaretbrown@gmail.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   person_id      Race            Name                    Email\n",
       "0          0     white   Richard Brown   richardbrown@gmail.com\n",
       "1          0     black  DeShawn Thomas  deshawnthomas@gmail.com\n",
       "2          0     asian       Sun Zhang       sunzhang@gmail.com\n",
       "3          0  hispanic   Enrique Reyes   enriquereyes@gmail.com\n",
       "4          1     white  Margaret Brown  margaretbrown@gmail.com"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results = []\n",
    "\n",
    "for i, row in personas_df.iterrows():\n",
    "    for race in [\"white\", \"black\", \"asian\", \"hispanic\"]:\n",
    "        name = sample_names(race, row[\"Sex\"], 1)[0]\n",
    "        email = name.lower().replace(\" \",\"\")+\"@gmail.com\"\n",
    "\n",
    "        results.append([row[\"person_id\"], race, name, email])\n",
    "\n",
    "name_df = pd.DataFrame(results, columns=[\"person_id\", \"Race\", \"Name\", \"Email\"])\n",
    "name_df.to_csv(\"./data/generated_names.csv\", index=False)\n",
    "print(\"Generated names saved to 'generated_names.csv'\")\n",
    "name_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{ll}\n",
      "\\toprule\n",
      "Category & Values \\\\\n",
      "\\midrule\n",
      "Age & [np.int64(25), np.int64(26), np.int64(27), np.int64(28), np.int64(29), np.int64(30), np.int64(31), np.int64(32), np.int64(33), np.int64(34), np.int64(35), np.int64(36), np.int64(37), np.int64(38), np.int64(39), np.int64(40), np.int64(41), np.int64(42), np.int64(43), np.int64(44)] \\\\\n",
      "Sex & ['Male', 'Female'] \\\\\n",
      "Education & [\"Associate's Degree\", \"Bachelor's Degree\", \"Master's Degree\"] \\\\\n",
      "Class of Worker & ['Private', 'Public', 'Self-Employed'] \\\\\n",
      "Marital Status & ['Single', 'Married', 'Divorced'] \\\\\n",
      "Place of Birth & ['New York', 'New Jersey', 'Connecticut', 'Canada', 'Pennsylvania', 'California', 'Florida'] \\\\\n",
      "Big Five Scores 1 & ['High openness', 'High conscientiousness', 'High extraversion', 'High agreeableness', 'High neuroticism'] \\\\\n",
      "Big Five Scores 2 & ['High openness', 'High conscientiousness', 'High extraversion', 'High agreeableness', 'High neuroticism'] \\\\\n",
      "Defining Quirks & ['Always punctual', 'Loves puzzles', 'Extremely organized', 'Very social', 'Introverted'] \\\\\n",
      "Personal Time & ['Reading', 'Playing sports', 'Gaming', 'Cooking', 'Traveling'] \\\\\n",
      "Lifestyle & ['Active', 'Sedentary', 'Balanced', 'Workaholic', 'Laid-back'] \\\\\n",
      "Political Views & ['Democrat', 'Republican', 'Independent', 'Green', 'Libertarian'] \\\\\n",
      "Fertility & ['Has children', 'Does not have children', 'Planning to have children', 'Undecided'] \\\\\n",
      "Income Bracket & ['Low income', 'Middle income', 'Upper-middle income', 'High income'] \\\\\n",
      "Housing Situation & ['Owns home', 'Rents'] \\\\\n",
      "Relationship with Technology & ['Tech-savvy', 'Familiar', 'Tech-averse'] \\\\\n",
      "Hobbies & ['Gardening', 'Photography', 'Crafting', 'Hiking', 'Playing musical instruments'] \\\\\n",
      "Communication Style & ['Direct', 'Diplomatic', 'Reserved', 'Open', 'Humorous'] \\\\\n",
      "Risk Tolerance & ['Risk-averse', 'Moderate risk-taker', 'High risk-taker'] \\\\\n",
      "Travel Frequency & ['Frequent traveler', 'Occasional traveler', 'Rare traveler', 'Never travels'] \\\\\n",
      "Pet Ownership & ['Owns a dog', 'Owns a cat', 'Owns other pets', 'No pets'] \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "attribute_dict = {\n",
    "    \"Age\": list(np.arange(25, 45)),\n",
    "    \"Sex\": [\"Male\", \"Female\"],\n",
    "    \"Education\": [\"Associate's Degree\", \"Bachelor's Degree\", \"Master's Degree\"],\n",
    "    \"Class of Worker\": [\"Private\", \"Public\", \"Self-Employed\"],\n",
    "    \"Marital Status\": [\"Single\", \"Married\", \"Divorced\"],\n",
    "    \"Place of Birth\": [\"New York\", \"New Jersey\", \"Connecticut\", \"Canada\", \"Pennsylvania\", \"California\", \"Florida\"],\n",
    "    \"Big Five Scores 1\": [\"High openness\", \"High conscientiousness\", \"High extraversion\", \"High agreeableness\", \"High neuroticism\"],\n",
    "    \"Big Five Scores 2\": [\"High openness\", \"High conscientiousness\", \"High extraversion\", \"High agreeableness\", \"High neuroticism\"],\n",
    "    \"Defining Quirks\": [\"Always punctual\", \"Loves puzzles\", \"Extremely organized\", \"Very social\", \"Introverted\"],\n",
    "    \"Personal Time\": [\"Reading\", \"Playing sports\", \"Gaming\", \"Cooking\", \"Traveling\"],\n",
    "    \"Lifestyle\": [\"Active\", \"Sedentary\", \"Balanced\", \"Workaholic\", \"Laid-back\"],\n",
    "    \"Political Views\": [\"Democrat\", \"Republican\", \"Independent\", \"Green\", \"Libertarian\"],\n",
    "    \"Fertility\": [\"Has children\", \"Does not have children\", \"Planning to have children\", \"Undecided\"],\n",
    "    \"Income Bracket\": [\"Low income\", \"Middle income\", \"Upper-middle income\", \"High income\"],\n",
    "    \"Housing Situation\": [\"Owns home\", \"Rents\"],\n",
    "    \"Relationship with Technology\": [\"Tech-savvy\", \"Familiar\", \"Tech-averse\"],\n",
    "    \"Hobbies\": [\"Gardening\", \"Photography\", \"Crafting\", \"Hiking\", \"Playing musical instruments\"],\n",
    "    \"Communication Style\": [\"Direct\", \"Diplomatic\", \"Reserved\", \"Open\", \"Humorous\"],\n",
    "    \"Risk Tolerance\": [\"Risk-averse\", \"Moderate risk-taker\", \"High risk-taker\"],\n",
    "    \"Travel Frequency\": [\"Frequent traveler\", \"Occasional traveler\", \"Rare traveler\", \"Never travels\"],\n",
    "    \"Pet Ownership\": [\"Owns a dog\", \"Owns a cat\", \"Owns other pets\", \"No pets\"],\n",
    "}\n",
    "df = pd.DataFrame([[k,v] for k,v in attribute_dict.items()], columns=[\"Category\", \"Values\"])\n",
    "print(df.to_latex(index=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{ll}\n",
      "\\toprule\n",
      "Attribute & Keywords \\\\\n",
      "\\midrule\n",
      "emotional_intelligence & ['empathetic', 'supportive', 'compassionate', 'understanding', 'caring', 'patient', 'nurturing'] \\\\\n",
      "competitiveness & ['competitive', 'driven', 'goal-oriented', 'focused', 'ambitious', 'outperformed', 'won', 'achieved'] \\\\\n",
      "stability_reliability & ['reliable', 'consistent', 'punctual', 'dependable', 'steady', 'committed', 'loyal'] \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "attribute_dict = {\n",
    "    \"emotional_intelligence\": [\n",
    "        \"empathetic\", \"supportive\", \"compassionate\", \"understanding\", \"caring\", \n",
    "        \"patient\", \"nurturing\"\n",
    "    ],\n",
    "    \"competitiveness\": [\n",
    "        \"competitive\", \"driven\", \"goal-oriented\", \"focused\", \"ambitious\", \n",
    "        \"outperformed\", \"won\", \"achieved\"\n",
    "    ],\n",
    "    \"stability_reliability\": [\n",
    "        \"reliable\", \"consistent\", \"punctual\", \"dependable\", \"steady\", \"committed\", \"loyal\"\n",
    "    ],\n",
    "}\n",
    "df = pd.DataFrame([[k,v] for k,v in attribute_dict.items()], columns=[\"Attribute\", \"Keywords\"])\n",
    "print(df.to_latex(index=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.11.6 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  },
  "vscode": {
   "interpreter": {
    "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
