{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "from logging import getLogger\n",
    "from pathlib import Path\n",
    "import os\n",
    "import sys\n",
    "sys.path.append(os.pardir)\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd \n",
    "from tqdm import tqdm\n",
    "from sklearn.utils import check_random_state\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "from utils import fix_seed, empty_metrics\n",
    "from run import run_dynamic_match\n",
    "from visualization_seed import plot_match_per, plot_number_user_retain, plot_user_retain\n",
    "\n",
    "from synthetic_data import generate_data, generate_reward_data, train_model\n",
    "import conf\n",
    "import os\n",
    "import pickle\n",
    "logger = getLogger(__name__)\n",
    "logger.info(f\"The current working directory is {Path().cwd()}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "color_dict = {\n",
    "    \"MRet\": \"tab:red\",\n",
    "    \"Optimal\":  \"tab:blue\",\n",
    "    \"MRet (best)\":  \"tab:blue\",\n",
    "    \"Uniform\":  \"tab:green\",\n",
    "    \"Max Match\":    \"tab:cyan\",\n",
    "    \"FairCo\": \"tab:pink\",\n",
    "    \"FairCo (equal exposure)\": \"blueviolet\"\n",
    "    # brown, cyan, grey, pink, purple, olive\n",
    "}\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "plt.style.use(\"ggplot\")\n",
    "fig, ax = plt.subplots(figsize=(10, 2)) \n",
    "\n",
    "for estimator_name in [\"Max Match\", \"FairCo\", \"FairCo (equal exposure)\", \"Uniform\", \"MRet\", \"MRet (best)\"]:\n",
    "    ax.plot([], [], color=color_dict[estimator_name], marker='o', label=estimator_name, markersize=13, linewidth=5)\n",
    "\n",
    "\n",
    "ax.legend(loc=\"center\", ncol=len(conf.show_method_list), fontsize=18)\n",
    "ax.axis('off')  \n",
    "\n",
    "fig.subplots_adjust(left=0.8, right=0.9, top=0.2, bottom=0.1) \n",
    "save_path = Path(f\"../fig/legend.png\")\n",
    "if save_path is not None:\n",
    "    save_path.parent.mkdir(parents=True, exist_ok=True) \n",
    "    plt.savefig(save_path, dpi=300, bbox_inches=\"tight\")\n",
    "    print(f\"Figure saved to {save_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "plt.style.use(\"ggplot\")\n",
    "fig, ax = plt.subplots(figsize=(10, 2)) \n",
    "\n",
    "for estimator_name in ['FairCo', 'MRet (best)']:\n",
    "    ax.plot([], [], color=color_dict[estimator_name],label=estimator_name, markersize=13, linewidth=5, alpha=0.5)\n",
    "\n",
    "\n",
    "ax.legend(loc=\"center\", ncol=len(conf.show_method_list), fontsize=18)\n",
    "ax.axis('off')  \n",
    "\n",
    "fig.subplots_adjust(left=0.8, right=0.9, top=0.2, bottom=0.1) \n",
    "save_path = Path(f\"../fig/legend_hist.png\")\n",
    "if save_path is not None:\n",
    "    save_path.parent.mkdir(parents=True, exist_ok=True) \n",
    "    plt.savefig(save_path, dpi=300, bbox_inches=\"tight\")\n",
    "    print(f\"Figure saved to {save_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# How does MRet perform as the timestep $t$ increases?\n",
    "\n",
    "log_path = Path(\"../result/T\")\n",
    "df_path = log_path / \"df\"\n",
    "all_data = pd.read_csv(df_path / \"all_data_results.csv\")\n",
    "from visualization_seed import plot_match_and_user_retain\n",
    "\n",
    "\n",
    "plot_match_and_user_retain(\n",
    "    all_data,  # DataFrame\n",
    "    side=\"both\",  # one of \"x\", \"y\", \"both\"\n",
    "    n_x=1000,  # Number of users on x side\n",
    "    n_y=1000,  # Number of users on y side\n",
    "    figsize=(18, 6),  # Size of the entire graph\n",
    "    x_log_scale=False,  # Whether to use log scale for x-axis\n",
    "    y_sig_digits=None,\n",
    "    legend_ncol=8,  # Number of columns in legend\n",
    "    save_path=None  # Save path (If None, do not save)\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# How does FairCo perform under the equal-exposure fairness criterion? \n",
    "save_path = Path(f\"../fig/synthetic_T_exp.png\")\n",
    "plot_match_and_user_retain(\n",
    "    all_data,  \n",
    "    side=\"both\",  \n",
    "    n_x=n_x,  \n",
    "    n_y=n_y,  \n",
    "    figsize=(18, 6),  \n",
    "    x_log_scale=False, \n",
    "    y_sig_digits=None,\n",
    "    legend_ncol=8, \n",
    "    save_path=save_path  \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Why does Fairco underperform in user retention?\n",
    "log_path = Path(\"../result/T\")\n",
    "df_path = log_path / \"df\"\n",
    "all_data = pd.read_csv(df_path / \"all_data_results.csv\")\n",
    "with open(df_path/\"results.pkl\", \"rb\") as f:\n",
    "    results = pickle.load(f)\n",
    "\n",
    "from visualization_seed import plot_histogram\n",
    "method_list = conf.method_list\n",
    "T=conf.T\n",
    "n_x=conf.n_x\n",
    "n_y=conf.n_y\n",
    "\n",
    "save_path = Path(f\"../fig/hist.png\")\n",
    "plot_histogram(results, method_list=['MRet (best)','FairCo (lam=100)'], metric=\"active_match\", T=T-1, xlabel=\"number of matches\", save_path=save_path, figsize=(8,6))\n",
    "save_path = Path(f\"../fig/hist_optimal.png\")\n",
    "plot_histogram(results, method_list=['MRet (best)','FairCo (lam=100)'], metric=\"effective_active_match\", T=T-1, xlabel=\"number of matches\", save_path=save_path, figsize=(8,6))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from visualization_seed import plot_match_and_user_retain\n",
    "from visualization_seed_variable import plot_match_and_user_retain_variable\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# How does MRet perform when user popularity varies?\n",
    "variable = \"kappa\"\n",
    "log_path = Path(f\"../result/{variable}\")\n",
    "df_path = log_path / \"df\"\n",
    "all_data = pd.read_csv(df_path / \"all_data_results.csv\")\n",
    "save_path = Path(f\"../fig/synthetic_{variable}.png\")\n",
    "\n",
    "plot_match_and_user_retain_variable(\n",
    "    all_data=all_data,\n",
    "    variable=variable,\n",
    "    n_x=conf.n_x,\n",
    "    n_y=conf.n_x,\n",
    "    T=conf.T,\n",
    "    x_log_scale=False,\n",
    "    figsize=(18, 6),\n",
    "    save_path=save_path  # Specify save location\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# How does the proposed method perform when varying the number of users?\n",
    "variable = \"n_xy\"\n",
    "log_path = Path(f\"../result/{variable}\")\n",
    "df_path = log_path / \"df\"\n",
    "all_data = pd.read_csv(df_path / \"all_data_results.csv\")\n",
    "save_path = Path(f\"../fig/synthetic_{variable}.png\")\n",
    "\n",
    "plot_match_and_user_retain_variable(\n",
    "    all_data=all_data,\n",
    "    variable=variable,\n",
    "    n_x=conf.n_x,\n",
    "    n_y=conf.n_x,\n",
    "    T=conf.T,\n",
    "    x_log_scale=True,\n",
    "    figsize=(18, 6),\n",
    "    save_path=save_path  # Specify save location\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# How does the hyperparameter of FairCo affect its performance?\n",
    "variable = \"lambda_\"\n",
    "log_path = Path(f\"../result/{variable}\")\n",
    "df_path = log_path / \"df\"\n",
    "all_data = pd.read_csv(df_path / \"all_data_results.csv\")\n",
    "save_path = Path(f\"../fig/synthetic_{variable}.png\")\n",
    "\n",
    "plot_match_and_user_retain_variable(\n",
    "    all_data=all_data,\n",
    "    variable=variable,\n",
    "    n_x=conf.n_x,\n",
    "    n_y=conf.n_x,\n",
    "    T=conf.T,\n",
    "    x_log_scale=True,\n",
    "    figsize=(18, 6),\n",
    "    save_path=save_path  # Specify save location\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# How does MRet perform under varying noise levels in the match probabilities?\n",
    "variable = \"rel_noise\"\n",
    "log_path = Path(f\"../result/{variable}\")\n",
    "df_path = log_path / \"df\"\n",
    "all_data = pd.read_csv(df_path / \"all_data_results.csv\")\n",
    "save_path = Path(f\"../fig/synthetic_{variable}.png\")\n",
    "\n",
    "plot_match_and_user_retain_variable(\n",
    "    all_data=all_data,\n",
    "    variable=variable,\n",
    "    n_x=conf.n_x,\n",
    "    n_y=conf.n_x,\n",
    "    T=conf.T,\n",
    "    x_log_scale=False,\n",
    "    figsize=(18, 6),\n",
    "    save_path=save_path  # Specify save location\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# How does MRet perform when popularity drifts over time? \n",
    "log_path = Path(\"../result/time_popularity\")\n",
    "df_path = log_path / \"df\"\n",
    "all_data = pd.read_csv(df_path / \"all_data_results.csv\")\n",
    "save_path = Path(f\"../fig/synthetic_time_popularity.png\")\n",
    "plot_match_and_user_retain(\n",
    "    all_data,  \n",
    "    side=\"both\",  \n",
    "    n_x=n_x,  \n",
    "    n_y=n_y,  \n",
    "    figsize=(18, 6),  \n",
    "    x_log_scale=False, \n",
    "    y_sig_digits=None,\n",
    "    legend_ncol=8, \n",
    "    save_path=save_path  \n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# How accurate is MRet as an approximation to the Optimal method?\n",
    "log_path = Path(\"../result/T_optimal\")\n",
    "df_path = log_path / \"df\"\n",
    "all_data = pd.read_csv(df_path / \"all_data_results.csv\")\n",
    "\n",
    "save_path = Path(f\"../fig/synthetic_T_optimal.png\")\n",
    "plot_match_and_user_retain(\n",
    "    all_data,  \n",
    "    side=\"both\",  \n",
    "    n_x=n_x,  \n",
    "    n_y=n_y,  \n",
    "    figsize=(18, 6),  \n",
    "    x_log_scale=False, \n",
    "    y_sig_digits=None,\n",
    "    legend_ncol=8, \n",
    "    save_path=save_path  \n",
    ")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
