{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.cluster import KMeans\n",
    "from src.lfgp import LFGP\n",
    "from collections import defaultdict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Jobs Database:\n",
      "    Ability_1  Ability_2  Ability_3  Ability_4  Ability_5  Ability_6  \\\n",
      "0           1          1          0          1          0          1   \n",
      "1           1          0          1          1          0          1   \n",
      "2           1          0          0          1          0          0   \n",
      "3           0          0          0          1          0          0   \n",
      "4           1          1          0          0          1          0   \n",
      "5           0          1          0          0          0          1   \n",
      "6           1          0          1          1          1          1   \n",
      "7           1          0          1          0          0          0   \n",
      "8           0          1          0          1          1          1   \n",
      "9           0          1          0          1          1          0   \n",
      "10          1          0          0          1          0          0   \n",
      "11          0          1          1          0          0          0   \n",
      "12          1          0          1          1          0          1   \n",
      "13          1          0          1          0          0          0   \n",
      "14          0          0          1          0          1          0   \n",
      "15          0          0          0          0          0          0   \n",
      "16          1          1          0          1          1          0   \n",
      "17          0          1          0          0          1          0   \n",
      "18          0          0          0          0          0          1   \n",
      "19          1          1          0          0          0          1   \n",
      "20          1          1          1          0          1          0   \n",
      "21          1          1          1          0          0          1   \n",
      "22          1          1          0          0          0          0   \n",
      "23          0          1          1          1          1          0   \n",
      "24          1          1          0          0          0          1   \n",
      "25          0          1          1          0          1          0   \n",
      "26          0          0          0          1          1          0   \n",
      "27          0          0          1          1          1          0   \n",
      "28          0          0          0          0          0          0   \n",
      "29          0          1          0          1          1          1   \n",
      "30          0          0          0          1          0          1   \n",
      "31          0          1          1          1          1          1   \n",
      "32          1          0          1          1          1          1   \n",
      "33          1          1          0          0          1          1   \n",
      "34          0          1          1          1          0          1   \n",
      "35          0          1          0          0          1          1   \n",
      "36          0          1          1          1          0          1   \n",
      "37          1          0          1          1          1          0   \n",
      "38          1          0          0          1          1          1   \n",
      "39          1          1          0          0          1          1   \n",
      "40          0          0          0          0          0          0   \n",
      "41          0          0          0          0          1          0   \n",
      "42          1          0          1          0          1          0   \n",
      "43          1          1          0          1          1          1   \n",
      "44          0          0          0          0          0          0   \n",
      "45          0          0          1          0          0          0   \n",
      "46          1          0          1          0          0          0   \n",
      "47          0          1          1          1          1          0   \n",
      "48          1          1          1          1          0          1   \n",
      "49          1          1          0          0          1          0   \n",
      "\n",
      "       Job Type  \n",
      "0    Job Type_1  \n",
      "1    Job Type_2  \n",
      "2    Job Type_3  \n",
      "3    Job Type_4  \n",
      "4    Job Type_5  \n",
      "5    Job Type_6  \n",
      "6    Job Type_7  \n",
      "7    Job Type_8  \n",
      "8    Job Type_9  \n",
      "9   Job Type_10  \n",
      "10  Job Type_11  \n",
      "11  Job Type_12  \n",
      "12  Job Type_13  \n",
      "13  Job Type_14  \n",
      "14  Job Type_15  \n",
      "15  Job Type_16  \n",
      "16  Job Type_17  \n",
      "17  Job Type_18  \n",
      "18  Job Type_19  \n",
      "19  Job Type_20  \n",
      "20  Job Type_21  \n",
      "21  Job Type_22  \n",
      "22  Job Type_23  \n",
      "23  Job Type_24  \n",
      "24  Job Type_25  \n",
      "25  Job Type_26  \n",
      "26  Job Type_27  \n",
      "27  Job Type_28  \n",
      "28  Job Type_29  \n",
      "29  Job Type_30  \n",
      "30  Job Type_31  \n",
      "31  Job Type_32  \n",
      "32  Job Type_33  \n",
      "33  Job Type_34  \n",
      "34  Job Type_35  \n",
      "35  Job Type_36  \n",
      "36  Job Type_37  \n",
      "37  Job Type_38  \n",
      "38  Job Type_39  \n",
      "39  Job Type_40  \n",
      "40  Job Type_41  \n",
      "41  Job Type_42  \n",
      "42  Job Type_43  \n",
      "43  Job Type_44  \n",
      "44  Job Type_45  \n",
      "45  Job Type_46  \n",
      "46  Job Type_47  \n",
      "47  Job Type_48  \n",
      "48  Job Type_49  \n",
      "49  Job Type_50  \n",
      "\n",
      "Workers Database:\n",
      "    Skill_1  Skill_2  Skill_3  Skill_4  Skill_5  Skill_6     Worker Type\n",
      "0         0        0        0        0        1        1   Worker Type_1\n",
      "1         0        0        0        1        0        0   Worker Type_2\n",
      "2         1        0        0        0        0        1   Worker Type_3\n",
      "3         0        0        1        1        0        1   Worker Type_4\n",
      "4         1        1        1        1        1        0   Worker Type_5\n",
      "5         0        1        1        1        0        0   Worker Type_6\n",
      "6         0        1        0        1        1        0   Worker Type_7\n",
      "7         1        1        1        1        0        1   Worker Type_8\n",
      "8         0        1        1        0        0        1   Worker Type_9\n",
      "9         0        0        0        0        0        0  Worker Type_10\n",
      "10        1        1        1        1        0        1  Worker Type_11\n",
      "11        0        1        1        0        0        1  Worker Type_12\n",
      "12        1        1        1        0        0        0  Worker Type_13\n",
      "13        1        0        0        1        0        0  Worker Type_14\n",
      "14        0        1        0        1        1        0  Worker Type_15\n",
      "15        0        1        0        0        0        1  Worker Type_16\n",
      "16        1        1        1        0        1        1  Worker Type_17\n",
      "17        0        0        0        1        1        0  Worker Type_18\n",
      "18        1        0        0        1        1        0  Worker Type_19\n",
      "19        1        0        1        0        1        0  Worker Type_20\n",
      "20        1        1        0        1        1        1  Worker Type_21\n",
      "21        0        1        1        0        0        0  Worker Type_22\n",
      "22        0        0        0        0        1        0  Worker Type_23\n",
      "23        1        0        1        0        1        1  Worker Type_24\n",
      "24        1        1        1        0        1        0  Worker Type_25\n",
      "25        1        1        0        0        1        0  Worker Type_26\n",
      "26        0        0        1        1        1        0  Worker Type_27\n",
      "27        1        0        1        1        0        0  Worker Type_28\n",
      "28        1        0        1        0        1        1  Worker Type_29\n",
      "29        1        0        0        1        0        1  Worker Type_30\n",
      "30        1        0        1        0        1        1  Worker Type_31\n",
      "31        0        1        0        1        0        0  Worker Type_32\n",
      "32        1        0        0        0        1        1  Worker Type_33\n",
      "33        0        1        1        0        0        0  Worker Type_34\n",
      "34        0        0        1        0        1        1  Worker Type_35\n",
      "35        0        1        1        1        0        1  Worker Type_36\n",
      "36        1        0        0        1        1        0  Worker Type_37\n",
      "37        0        1        1        1        0        0  Worker Type_38\n",
      "38        0        1        1        0        1        0  Worker Type_39\n",
      "39        1        1        1        0        1        0  Worker Type_40\n",
      "40        0        0        1        0        0        0  Worker Type_41\n",
      "41        0        0        0        1        1        0  Worker Type_42\n",
      "42        0        0        1        0        1        1  Worker Type_43\n",
      "43        1        0        1        0        1        1  Worker Type_44\n",
      "44        0        0        0        1        0        0  Worker Type_45\n",
      "45        1        0        0        0        0        1  Worker Type_46\n",
      "46        0        0        1        1        1        0  Worker Type_47\n",
      "47        0        0        1        1        1        1  Worker Type_48\n",
      "48        0        1        0        1        1        0  Worker Type_49\n",
      "49        0        0        0        1        1        0  Worker Type_50\n"
     ]
    }
   ],
   "source": [
    "np.random.seed(10)\n",
    "\n",
    "n_jobs = 50  # Number of job types\n",
    "n_workers = 50  # Number of worker types\n",
    "k = 6  # Dimension of the latent factor vector\n",
    "\n",
    "# Initialize the latent ability estimate for each worker as a uniform probability distribution\n",
    "# Generate the latent factor vector for jobs\n",
    "jobs_latent_factors = np.random.randint(2, size=(n_jobs, k))\n",
    "jobs_df = pd.DataFrame(jobs_latent_factors, columns=[f'Ability_{i}' for i in range(1, k+1)])\n",
    "jobs_df['Job Type'] = [f'Job Type_{i}' for i in range(1, n_jobs+1)]\n",
    "\n",
    "# Generate the latent factor vector for workers\n",
    "workers_latent_factors = np.random.randint(2, size=(n_workers, k))\n",
    "workers_df = pd.DataFrame(workers_latent_factors, columns=[f'Skill_{i}' for i in range(1, k+1)])\n",
    "workers_df['Worker Type'] = [f'Worker Type_{i}' for i in range(1, n_workers+1)]\n",
    "\n",
    "# Display the generated data\n",
    "print(\"Jobs Database:\")\n",
    "print(jobs_df)\n",
    "print(\"\\nWorkers Database:\")\n",
    "print(workers_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Job group information: [3 4 2 4 4 1 2 2 2 4 3 2 4 1 3 1 3 4 0 3 1 4 3 0 0 2 2 1 3 3 2 3 3 0 2 4 2\n",
      " 4 0 1 3 0 3 1 1 0 1 4 1 3]\n",
      "Worker group information: [3 3 3 4 2 0 3 1 3 1 1 3 4 1 1 3 1 1 3 3 0 4 4 1 4 1 0 3 3 3 4 0 4 4 0 0 0\n",
      " 0 3 2 2 0 2 2 0 2 4 1 1 0]\n"
     ]
    }
   ],
   "source": [
    "# Initialize LFGP model instance\n",
    "model = LFGP(lf_dim=6, n_worker_group=5, lambda1=1, lambda2=1)\n",
    "\n",
    "# Here we need to extract the latent factor vectors from workers_df and jobs_df, and convert them into a format suitable for the LFGP model\n",
    "workers_lf = workers_df.values\n",
    "jobs_lf = jobs_df.values\n",
    "\n",
    "# Directly set the model's worker and job latent factor vectors, as well as the corresponding group information\n",
    "model.A = jobs_lf  # Assume A represents the latent factors of jobs\n",
    "model.B = workers_lf  # Assume B represents the latent factors of workers\n",
    "\n",
    "# Assume each worker and job is randomly assigned to a group\n",
    "np.random.seed(42)\n",
    "model.U = np.random.randint(0, 5, size=(n_jobs,))  # Job grouping\n",
    "model.V = np.random.randint(0, 5, size=(n_workers,))  # Worker grouping\n",
    "\n",
    "print(\"Job group information:\", model.U)\n",
    "print(\"Worker group information:\", model.V)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "High-quality worker group for each job group: {3: 3, 4: 4, 2: 3, 1: 0, 0: 3}\n"
     ]
    }
   ],
   "source": [
    "# Hypothetical group information for jobs and workers\n",
    "jobs_group_info = np.array(model.U)\n",
    "workers_group_info = np.array(model.V)\n",
    "\n",
    "# Identify high-quality worker groups in each job group\n",
    "def identify_high_quality_worker_groups(jobs_group_info, workers_group_info):\n",
    "    # Build a mapping from job groups to worker groups\n",
    "    job_to_workers_groups = defaultdict(lambda: defaultdict(int))\n",
    "    \n",
    "    for job_group, worker_group in zip(jobs_group_info, workers_group_info):\n",
    "        job_to_workers_groups[job_group][worker_group] += 1\n",
    "    \n",
    "    high_quality_groups_for_jobs = {}\n",
    "    \n",
    "    for job_group, workers_groups in job_to_workers_groups.items():\n",
    "        # Identify the worker group with the highest count in each job group as the high-quality group\n",
    "        high_quality_group = max(workers_groups, key=workers_groups.get)\n",
    "        high_quality_groups_for_jobs[job_group] = high_quality_group\n",
    "    \n",
    "    return high_quality_groups_for_jobs\n",
    "\n",
    "# Execute the function to get the high-quality worker group for each job group\n",
    "high_quality_groups = identify_high_quality_worker_groups(jobs_group_info, workers_group_info)\n",
    "\n",
    "print(\"High-quality worker group for each job group:\", high_quality_groups)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "worker 0 is matched with job 41\n",
      "worker 1 is matched with job 23\n",
      "worker 2 is matched with job 18\n",
      "worker 6 is matched with job 33\n",
      "worker 8 is matched with job 38\n",
      "worker 11 is matched with job 24\n",
      "worker 15 is matched with job 45\n",
      "worker 18 is matched with job 7\n",
      "worker 19 is matched with job 36\n",
      "worker 27 is matched with job 26\n",
      "worker 28 is matched with job 31\n",
      "worker 29 is matched with job 28\n",
      "worker 38 is matched with job 42\n",
      "worker 3 is matched with job 21\n",
      "worker 12 is matched with job 3\n",
      "worker 21 is matched with job 9\n",
      "worker 22 is matched with job 37\n",
      "worker 24 is matched with job 35\n",
      "worker 30 is matched with job 17\n",
      "worker 32 is matched with job 12\n",
      "worker 33 is matched with job 47\n",
      "worker 46 is matched with job 1\n",
      "worker 5 is matched with job 13\n",
      "worker 20 is matched with job 46\n",
      "worker 26 is matched with job 39\n",
      "worker 31 is matched with job 15\n",
      "worker 34 is matched with job 48\n",
      "worker 35 is matched with job 44\n",
      "worker 36 is matched with job 27\n",
      "worker 37 is matched with job 20\n",
      "worker 41 is matched with job 5\n",
      "worker 44 is matched with job 43\n"
     ]
    }
   ],
   "source": [
    "matched_jobs = {}\n",
    "for job_group, worker_group in high_quality_groups.items():\n",
    "    jobs_in_group = [job_index for job_index, group in enumerate(jobs_group_info) if group == job_group]\n",
    "    np.random.shuffle(jobs_in_group) \n",
    "    \n",
    "    workers_in_group = [worker_index for worker_index, group in enumerate(workers_group_info) if group == worker_group]\n",
    "    \n",
    "    # Each worker is randomly matched to a job\n",
    "    for worker_index in workers_in_group:\n",
    "        if len(jobs_in_group) == 0:\n",
    "            break  \n",
    "        job_index = jobs_in_group.pop(0)  # Remove and get the first job in the list\n",
    "        matched_jobs[worker_index] = job_index  \n",
    "\n",
    "for worker_index, job_index in matched_jobs.items():\n",
    "    print(f\"worker {worker_index} is matched with job {job_index}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
