{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Paraphrase results\n",
    "Reproduces the paraphrasing results. By default, uses existing experimental data. If own experimental data is available, you can point to the path of the experimental results to compute custom metrics."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# If None, uses results based on our experimental data. Point to path of own results if available\n",
    "RESULT_DIR = None # e.g. \"results_review\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from moralsim.analysis.metrics import compute_metrics_per_scenario, compute_metrics_per_model, get_groups\n",
    "models = {\n",
    "    \"GPT-4o\": [\"z-gpt-4o-2024-08-0\", \"openai/gpt-4o-2024-08-06\"],\n",
    "    \"Deepseek-R1\": \"deepseek/deepseek-r1\",\n",
    "}\n",
    "scenarios = {\n",
    "    \"pg_venture_dummy_cooperate_survival_cot\": {\"group\": \"pg_venture_dummy_cooperate_survival_cot\"},\n",
    "    \"pg_venture_dummy_cooperate_survival_cot_p0\": {\"group\": \"pg_venture_dummy_cooperate_survival_cot_p0\"},\n",
    "    \"pg_venture_dummy_cooperate_survival_cot_p1\": {\"group\": \"pg_venture_dummy_cooperate_survival_cot_p1\"},\n",
    "    \"pg_venture_dummy_cooperate_survival_cot_p2\": {\"group\": \"pg_venture_dummy_cooperate_survival_cot_p2\"},\n",
    "    \"pd_privacy_dummy_defect_cot\": {\"group\": \"pd_privacy_dummy_defect_cot\"},\n",
    "    \"pd_privacy_dummy_defect_cot_p0\": {\"group\": \"pd_privacy_dummy_defect_cot_p0\"},\n",
    "    \"pd_privacy_dummy_defect_cot_p1\": {\"group\": \"pd_privacy_dummy_defect_cot_p1\"},\n",
    "    \"pd_privacy_dummy_defect_cot_p2\": {\"group\": \"pd_privacy_dummy_defect_cot_p2\"},\n",
    "}\n",
    "\n",
    "scenario_metrics, scenario_runs, scenario_metrics_per_run = compute_metrics_per_scenario(models=models, scenarios=scenarios, paraphrase=True, result_dir=RESULT_DIR, save_dir=None)\n",
    "\n",
    "paraphrases = []\n",
    "originals = []\n",
    "for key, val in scenario_metrics.items():\n",
    "    if key.endswith(\"cot\"):\n",
    "        val.insert(0, \"group\", key)\n",
    "        val.insert(1, \"paraphrase\", None)\n",
    "        originals.append(val)\n",
    "    else:\n",
    "        val.insert(0, \"group\", key[:-3])\n",
    "        val.insert(1, \"paraphrase\", key[-2:])\n",
    "        paraphrases.append(val)\n",
    "paraphrase_df = pd.concat(paraphrases).reset_index(drop=True)\n",
    "original_df = pd.concat(originals).reset_index(drop=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "diffs = {\n",
    "    \"morality\": [],\n",
    "    \"payoff\": [],\n",
    "    \"opponent\": []\n",
    "}\n",
    "for id, row in paraphrase_df.iterrows():\n",
    "    for metric, diff in diffs.items():\n",
    "        diffs[metric].append(abs(row[metric] - original_df.loc[(original_df[\"group\"] == row[\"group\"]) & (original_df[\"model\"] == row[\"model\"]), metric].item()))\n",
    "diffs = {k: np.array(val) for k, val in diffs.items()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "morality: \tmean: 0.018\tstd: 0.024\n",
      "payoff: \tmean: 0.021\tstd: 0.032\n",
      "opponent: \tmean: 0.018\tstd: 0.024\n"
     ]
    }
   ],
   "source": [
    "for k, v in diffs.items():\n",
    "    print(f\"{k}: \\tmean: {v.mean():.3f}\\tstd: {v.std():.3f}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "govsim",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
