{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import openai\n",
    "import pandas as pd\n",
    "from openai import OpenAI\n",
    "import random\n",
    "from tqdm import tqdm\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to generate a resume\n",
    "def generate_resume(persona, job, client):\n",
    "\n",
    "    p_str = \"\\n\"\n",
    "    for k, v in persona.to_dict().items():\n",
    "        if k == \"person_id\":\n",
    "            continue\n",
    "        p_str += \"      {}: {}\\n\".format(k, v)\n",
    "\n",
    "    prompt = f\"\"\"\n",
    "    Generate a realistic 1/2 page resume for the following person for the job of {job}, in the New York metro area. \n",
    "    Make sure they are qualified.\n",
    "    As much as possible, include names of real companies and universities. \n",
    "    Do not include a name or email, instead give placeholders [NAME] and [EMAIL].  \n",
    "    DO NOT INCLUDE ANY OTHER PLACEHOLDERS, PLEASE FILL IN THE REST OF THE INFORMATION.\n",
    "    Only return the resume, do not give any explanation or further words.\n",
    "    \\n\n",
    "    Person:{p_str}\n",
    "    \"\"\"\n",
    "    \n",
    "    completion = client.chat.completions.create(\n",
    "        # model='gpt-4o-mini',\n",
    "        model='gpt-4o-2024-08-06',\n",
    "        temperature=1.2,\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
    "            {\"role\": \"user\", \"content\": prompt}\n",
    "        ]\n",
    "    )\n",
    "    \n",
    "    resume = completion.choices[0].message.content\n",
    "    return resume"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No. personas: 250\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Sex</th>\n",
       "      <th>Education</th>\n",
       "      <th>Class of Worker</th>\n",
       "      <th>Marital Status</th>\n",
       "      <th>Place of Birth</th>\n",
       "      <th>Big Five Scores 1</th>\n",
       "      <th>Big Five Scores 2</th>\n",
       "      <th>Defining Quirks</th>\n",
       "      <th>Personal Time</th>\n",
       "      <th>...</th>\n",
       "      <th>Fertility</th>\n",
       "      <th>Income Bracket</th>\n",
       "      <th>Housing Situation</th>\n",
       "      <th>Relationship with Technology</th>\n",
       "      <th>Hobbies</th>\n",
       "      <th>Communication Style</th>\n",
       "      <th>Risk Tolerance</th>\n",
       "      <th>Travel Frequency</th>\n",
       "      <th>Pet Ownership</th>\n",
       "      <th>person_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>36</td>\n",
       "      <td>Male</td>\n",
       "      <td>Bachelor's Degree</td>\n",
       "      <td>Public</td>\n",
       "      <td>Single</td>\n",
       "      <td>Connecticut</td>\n",
       "      <td>High openness</td>\n",
       "      <td>High agreeableness</td>\n",
       "      <td>Introverted</td>\n",
       "      <td>Gaming</td>\n",
       "      <td>...</td>\n",
       "      <td>Has children</td>\n",
       "      <td>High income</td>\n",
       "      <td>Rents</td>\n",
       "      <td>Tech-savvy</td>\n",
       "      <td>Photography</td>\n",
       "      <td>Open</td>\n",
       "      <td>Risk-averse</td>\n",
       "      <td>Rare traveler</td>\n",
       "      <td>Owns a dog</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>27</td>\n",
       "      <td>Female</td>\n",
       "      <td>Associate's Degree</td>\n",
       "      <td>Self-Employed</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>New Jersey</td>\n",
       "      <td>High neuroticism</td>\n",
       "      <td>High openness</td>\n",
       "      <td>Very social</td>\n",
       "      <td>Cooking</td>\n",
       "      <td>...</td>\n",
       "      <td>Has children</td>\n",
       "      <td>Upper-middle income</td>\n",
       "      <td>Owns home</td>\n",
       "      <td>Tech-averse</td>\n",
       "      <td>Gardening</td>\n",
       "      <td>Direct</td>\n",
       "      <td>Moderate risk-taker</td>\n",
       "      <td>Rare traveler</td>\n",
       "      <td>Owns a cat</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>43</td>\n",
       "      <td>Female</td>\n",
       "      <td>Master's Degree</td>\n",
       "      <td>Self-Employed</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Canada</td>\n",
       "      <td>High conscientiousness</td>\n",
       "      <td>High agreeableness</td>\n",
       "      <td>Loves puzzles</td>\n",
       "      <td>Gaming</td>\n",
       "      <td>...</td>\n",
       "      <td>Undecided</td>\n",
       "      <td>Low income</td>\n",
       "      <td>Owns home</td>\n",
       "      <td>Familiar</td>\n",
       "      <td>Crafting</td>\n",
       "      <td>Humorous</td>\n",
       "      <td>Moderate risk-taker</td>\n",
       "      <td>Frequent traveler</td>\n",
       "      <td>Owns other pets</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>30</td>\n",
       "      <td>Female</td>\n",
       "      <td>Bachelor's Degree</td>\n",
       "      <td>Self-Employed</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Florida</td>\n",
       "      <td>High openness</td>\n",
       "      <td>High openness</td>\n",
       "      <td>Extremely organized</td>\n",
       "      <td>Reading</td>\n",
       "      <td>...</td>\n",
       "      <td>Undecided</td>\n",
       "      <td>Low income</td>\n",
       "      <td>Rents</td>\n",
       "      <td>Tech-averse</td>\n",
       "      <td>Photography</td>\n",
       "      <td>Direct</td>\n",
       "      <td>Moderate risk-taker</td>\n",
       "      <td>Rare traveler</td>\n",
       "      <td>Owns other pets</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>32</td>\n",
       "      <td>Female</td>\n",
       "      <td>Associate's Degree</td>\n",
       "      <td>Public</td>\n",
       "      <td>Divorced</td>\n",
       "      <td>Canada</td>\n",
       "      <td>High conscientiousness</td>\n",
       "      <td>High conscientiousness</td>\n",
       "      <td>Always punctual</td>\n",
       "      <td>Cooking</td>\n",
       "      <td>...</td>\n",
       "      <td>Planning to have children</td>\n",
       "      <td>Low income</td>\n",
       "      <td>Owns home</td>\n",
       "      <td>Tech-averse</td>\n",
       "      <td>Hiking</td>\n",
       "      <td>Open</td>\n",
       "      <td>High risk-taker</td>\n",
       "      <td>Occasional traveler</td>\n",
       "      <td>Owns a cat</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Age     Sex           Education Class of Worker Marital Status  \\\n",
       "0   36    Male   Bachelor's Degree          Public         Single   \n",
       "1   27  Female  Associate's Degree   Self-Employed       Divorced   \n",
       "2   43  Female     Master's Degree   Self-Employed       Divorced   \n",
       "3   30  Female   Bachelor's Degree   Self-Employed       Divorced   \n",
       "4   32  Female  Associate's Degree          Public       Divorced   \n",
       "\n",
       "  Place of Birth       Big Five Scores 1       Big Five Scores 2  \\\n",
       "0    Connecticut           High openness      High agreeableness   \n",
       "1     New Jersey        High neuroticism           High openness   \n",
       "2         Canada  High conscientiousness      High agreeableness   \n",
       "3        Florida           High openness           High openness   \n",
       "4         Canada  High conscientiousness  High conscientiousness   \n",
       "\n",
       "       Defining Quirks Personal Time  ...                  Fertility  \\\n",
       "0          Introverted        Gaming  ...               Has children   \n",
       "1          Very social       Cooking  ...               Has children   \n",
       "2        Loves puzzles        Gaming  ...                  Undecided   \n",
       "3  Extremely organized       Reading  ...                  Undecided   \n",
       "4      Always punctual       Cooking  ...  Planning to have children   \n",
       "\n",
       "        Income Bracket Housing Situation Relationship with Technology  \\\n",
       "0          High income             Rents                   Tech-savvy   \n",
       "1  Upper-middle income         Owns home                  Tech-averse   \n",
       "2           Low income         Owns home                     Familiar   \n",
       "3           Low income             Rents                  Tech-averse   \n",
       "4           Low income         Owns home                  Tech-averse   \n",
       "\n",
       "       Hobbies Communication Style       Risk Tolerance     Travel Frequency  \\\n",
       "0  Photography                Open          Risk-averse        Rare traveler   \n",
       "1    Gardening              Direct  Moderate risk-taker        Rare traveler   \n",
       "2     Crafting            Humorous  Moderate risk-taker    Frequent traveler   \n",
       "3  Photography              Direct  Moderate risk-taker        Rare traveler   \n",
       "4       Hiking                Open      High risk-taker  Occasional traveler   \n",
       "\n",
       "     Pet Ownership person_id  \n",
       "0       Owns a dog         0  \n",
       "1       Owns a cat         1  \n",
       "2  Owns other pets         2  \n",
       "3  Owns other pets         3  \n",
       "4       Owns a cat         4  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Initialize the OpenAI API\n",
    "api_key = ''\n",
    "\n",
    "client = OpenAI(\n",
    "  api_key=api_key,  # this is also the default, it can be omitted\n",
    ")\n",
    "\n",
    "# Load generated personas\n",
    "n_resumes_to_generate = 250\n",
    "personas_df = pd.read_csv(\"./data/generated_personas.csv\")[:n_resumes_to_generate]\n",
    "print(\"No. personas:\", len(personas_df))\n",
    "personas_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "250it [49:20, 11.84s/it]\n"
     ]
    }
   ],
   "source": [
    "# Generate resumes\n",
    "resumes = []\n",
    "resumes_df = None\n",
    "\n",
    "jobs = [\n",
    "    \"Police Officer\",  \n",
    "    \"Social Worker\",\n",
    "]\n",
    "\n",
    "for p_i, persona in tqdm(personas_df.iterrows()):\n",
    "    for job in jobs:\n",
    "        resume = generate_resume(persona, job, client)\n",
    "        resumes.append({**persona, \"job\": job, \"resume\": resume})\n",
    "\n",
    "        # Convert to DataFrame and save to CSV\n",
    "        resumes_df = pd.DataFrame(resumes)\n",
    "        resumes_df.to_csv(\"./data/generated_resumes_with_personas_no_race.csv\", index=False)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.9 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.9"
  },
  "vscode": {
   "interpreter": {
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
