{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e0d68ea8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.chdir(\"../\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c1cb6616",
   "metadata": {},
   "outputs": [],
   "source": [
    "import collections\n",
    "import json\n",
    "from pathlib import Path\n",
    "from types import SimpleNamespace\n",
    "\n",
    "import click\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9f1c56b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def main(input_dir, output_file):\n",
    "    input_dir = Path(input_dir)\n",
    "\n",
    "    # recursively find all `acc-eval_results.json`\n",
    "    result_files = list(input_dir.rglob(\"acc-eval_results.json\"))\n",
    "    if not result_files:\n",
    "        click.echo(\"No acc-eval_results.json files found.\")\n",
    "        return\n",
    "\n",
    "    click.echo(f\"Found {len(result_files)} acc-eval_results.json files.\")\n",
    "    all_results = collections.defaultdict(list)\n",
    "    for file_path in result_files:\n",
    "        click.echo(f\"Parsing {file_path}\")\n",
    "        results = parse_acc_eval_results(file_path)\n",
    "\n",
    "        for acc_type, acc_data in results.items():\n",
    "            all_results[acc_type].append(acc_data)\n",
    "\n",
    "    for acc_type in all_results:\n",
    "        # Convert lists of dicts to DataFrame\n",
    "        df = pd.DataFrame(all_results[acc_type])\n",
    "        # Set the index to file_path\n",
    "        df = df.set_index(\"file_path\")\n",
    "        # Sort by index\n",
    "        df = df.sort_index()\n",
    "        # add an column for average accuracy\n",
    "        df[\"avg\"] = df.mean(axis=1)\n",
    "        # Rename columns to include the accuracy type\n",
    "        all_results[acc_type] = df\n",
    "    \n",
    "\n",
    "    output_file = Path(output_file)\n",
    "    output_file.parent.mkdir(parents=True, exist_ok=True)\n",
    "    is_first = True\n",
    "    for acc_type, output_df in all_results.items():\n",
    "        mode = \"w\" if is_first else \"a\"\n",
    "        is_first = False\n",
    "\n",
    "        output_df.to_csv(output_file, sep=\"\\t\", mode=mode)\n",
    "        click.echo(f\"Results saved to {output_file}\")\n",
    "\n",
    "    return all_results\n",
    "\n",
    "def parse_acc_eval_results(file_path):\n",
    "    \"\"\"Parse a single acc-eval_results.json file.\n",
    "    {\n",
    "    \"accuracy_total\": {\n",
    "        \"GBaker/MedQA-USMLE-4-options\": 0.10385916359163591,\n",
    "        \"openlifescienceai/headqa\": 0.08821770334928229,\n",
    "        \"openlifescienceai/medmcqa\": 0.11620646593589566\n",
    "    },\n",
    "    \"accuracy_pass@total\": {\n",
    "        \"GBaker/MedQA-USMLE-4-options\": 0.6783517835178352,\n",
    "        \"openlifescienceai/headqa\": 0.5980861244019139,\n",
    "        \"openlifescienceai/medmcqa\": 0.7236020535590398\n",
    "    },\n",
    "    \"total_num_rollouts\": 375296,\n",
    "    \"total_num_correct\": 43197,\n",
    "    \"num_samples\": 23456\n",
    "    }\n",
    "    \"\"\"\n",
    "    with open(file_path, \"r\") as f:\n",
    "        data = json.load(f)\n",
    "\n",
    "    parsed_data_dict = {}\n",
    "    for acc_type, acc_dict in data.items():\n",
    "        parsed_data = {}\n",
    "        if not acc_type.startswith(\"accuracy_\"):\n",
    "            continue\n",
    "        for dataset_name, acc_value in acc_dict.items():\n",
    "            key = dataset_name\n",
    "            parsed_data[key] = acc_value\n",
    "        parsed_data[\"file_path\"] = str(file_path)\n",
    "        parsed_data_dict[acc_type] = parsed_data\n",
    "\n",
    "    return parsed_data_dict\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "971653f4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 31 acc-eval_results.json files.\n",
      "Parsing outputs/greedy/v0/7b-m23k-checkpoint-4401/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-pmc_vqa-m23k_sft_epoch_5-step_800/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_300/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/7b-m23k-checkpoint-7335/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/3b-m23k-checkpoint-4401/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-pmc_vqa-m23k_rl-step_900/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_200/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-pmc_vqa-m23k_rl-step_1805/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/Qwen2.5-VL-7B-Instruct/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_200/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-pmc_vqa-m23k_sft_epoch_3-step_1150/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-m23k-step_320/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_300/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_400/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_500/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-pmc_vqa-step_451/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-pmc_vqa-m23k_sft_epoch_5-step_1400/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-m23k-easy_to_hard-step_320/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-pmc_vqa-step_451/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/Qwen2.5-VL-3B-Instruct/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-m23k-easy_to_hard-1_epoch-step_64/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_100/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_400/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-pmc_vqa-m23k_rl-step_1805/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_451/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_100/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_3b-m23k-easy_to_hard-1_epoch-step_64/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-m23k-easy_to_hard-step_320/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/3b-m23k-checkpoint-7335/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-pmc_vqa-m23k_sft_epoch_3-step_1805/acc-eval_results.json\n",
      "Parsing outputs/greedy/v0/train-qwen2_5_vl_7b-m23k-step_320/acc-eval_results.json\n",
      "Results saved to outputs/results-greedy-v0.tsv\n",
      "Results saved to outputs/results-greedy-v0.tsv\n",
      "Found 31 acc-eval_results.json files.\n",
      "Parsing outputs/greedy/v1/7b-m23k-checkpoint-4401/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-pmc_vqa-m23k_sft_epoch_5-step_800/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_300/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/7b-m23k-checkpoint-7335/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/3b-m23k-checkpoint-4401/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-pmc_vqa-m23k_rl-step_900/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_200/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-pmc_vqa-m23k_rl-step_1805/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/Qwen2.5-VL-7B-Instruct/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_200/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-pmc_vqa-m23k_sft_epoch_3-step_1150/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-m23k-step_320/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_300/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_400/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_500/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-pmc_vqa-step_451/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-pmc_vqa-m23k_sft_epoch_5-step_1400/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-m23k-easy_to_hard-step_320/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-pmc_vqa-step_451/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/Qwen2.5-VL-3B-Instruct/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-m23k-easy_to_hard-1_epoch-step_64/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_100/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_400/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-pmc_vqa-m23k_rl-step_1805/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_451/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_100/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_3b-m23k-easy_to_hard-1_epoch-step_64/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-m23k-easy_to_hard-step_320/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/3b-m23k-checkpoint-7335/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-pmc_vqa-m23k_sft_epoch_3-step_1805/acc-eval_results.json\n",
      "Parsing outputs/greedy/v1/train-qwen2_5_vl_7b-m23k-step_320/acc-eval_results.json\n",
      "Results saved to outputs/results-greedy-v1.tsv\n",
      "Results saved to outputs/results-greedy-v1.tsv\n",
      "Found 31 acc-eval_results.json files.\n",
      "Parsing outputs/greedy/v2/7b-m23k-checkpoint-4401/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-pmc_vqa-m23k_sft_epoch_5-step_800/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_300/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/7b-m23k-checkpoint-7335/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/3b-m23k-checkpoint-4401/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-pmc_vqa-m23k_rl-step_900/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_200/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-pmc_vqa-m23k_rl-step_1805/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/Qwen2.5-VL-7B-Instruct/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_200/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-pmc_vqa-m23k_sft_epoch_3-step_1150/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-m23k-step_320/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_300/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_400/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_500/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-pmc_vqa-step_451/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-pmc_vqa-m23k_sft_epoch_5-step_1400/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-m23k-easy_to_hard-step_320/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-pmc_vqa-step_451/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/Qwen2.5-VL-3B-Instruct/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-m23k-easy_to_hard-1_epoch-step_64/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_100/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_400/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-pmc_vqa-m23k_rl-step_1805/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_451/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_100/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_3b-m23k-easy_to_hard-1_epoch-step_64/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-m23k-easy_to_hard-step_320/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/3b-m23k-checkpoint-7335/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-pmc_vqa-m23k_sft_epoch_3-step_1805/acc-eval_results.json\n",
      "Parsing outputs/greedy/v2/train-qwen2_5_vl_7b-m23k-step_320/acc-eval_results.json\n",
      "Results saved to outputs/results-greedy-v2.tsv\n",
      "Results saved to outputs/results-greedy-v2.tsv\n"
     ]
    }
   ],
   "source": [
    "all_results_list = []\n",
    "all_results_list.append(main(\"outputs/greedy/v0\", \"outputs/results-greedy-v0.tsv\"))\n",
    "all_results_list.append(main(\"outputs/greedy/v1\", \"outputs/results-greedy-v1.tsv\"))\n",
    "all_results_list.append(main(\"outputs/greedy/v2\", \"outputs/results-greedy-v2.tsv\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "08025b0d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MMMU-medical      True\n",
       "MedXpertQA-MM     True\n",
       "pathvqa_closed    True\n",
       "pmc_vqa           True\n",
       "slake_closed      True\n",
       "vqa_rad_closed    True\n",
       "avg               True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_results_list[0].keys()\n",
    "(all_results_list[0][\"accuracy_total\"] == all_results_list[0][\"accuracy_pass@total\"]).all()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "dfeb28be",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_results_list = [i[\"accuracy_total\"] for i in all_results_list]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f98b62ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "# remove \"outputs/greedy/v*/\" prefix and \"/acc-eval_results.json\" suffix in index\n",
    "for i in all_results_list:\n",
    "    i.index = i.index.str.replace(r\"outputs/greedy/v\\d+/\", \"\", regex=True)\n",
    "    i.index = i.index.str.replace(r\"/acc-eval_results.json\", \"\", regex=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "667aaf18",
   "metadata": {},
   "outputs": [],
   "source": [
    "stacked = pd.concat(all_results_list, keys=range(len(all_results_list)))\n",
    "mean_df = stacked.groupby(level=1).mean()\n",
    "std_df = stacked.groupby(level=1).std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f12d6c43",
   "metadata": {},
   "outputs": [],
   "source": [
    "# display(stacked)\n",
    "# display(mean_df)\n",
    "# display(std_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f917c5ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "# reorder the index\n",
    "def reorder_index(df):\n",
    "    index_order = [\n",
    "    \"Qwen2.5-VL-3B-Instruct\",\n",
    "    \"3b-m23k-checkpoint-4401\",\n",
    "    \"3b-m23k-checkpoint-7335\",\n",
    "    \"train-qwen2_5_vl_3b-m23k-step_320\",\n",
    "    \"train-qwen2_5_vl_3b-m23k-easy_to_hard-step_320\",\n",
    "    \"train-qwen2_5_vl_3b-m23k-easy_to_hard-1_epoch-step_64\",\n",
    "    \"train-qwen2_5_vl_3b-pmc_vqa-step_451\",\n",
    "    \"train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_100\",\n",
    "    \"train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_200\",\n",
    "    \"train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_300\",\n",
    "    \"train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_400\",\n",
    "    \"train-qwen2_5_vl_3b-pmc_vqa-easy_to_hard-step_451\",\n",
    "    \"train-qwen2_5_vl_3b-pmc_vqa-m23k_sft_epoch_3-step_1150\",\n",
    "    \"train-qwen2_5_vl_3b-pmc_vqa-m23k_sft_epoch_5-step_1400\",\n",
    "    \"train-qwen2_5_vl_3b-pmc_vqa-m23k_rl-step_1805\",\n",
    "    \"Qwen2.5-VL-7B-Instruct\",\n",
    "    \"7b-m23k-checkpoint-4401\",\n",
    "    \"7b-m23k-checkpoint-7335\",\n",
    "    \"train-qwen2_5_vl_7b-m23k-step_320\",\n",
    "    \"train-qwen2_5_vl_7b-m23k-easy_to_hard-step_320\",\n",
    "    \"train-qwen2_5_vl_7b-m23k-easy_to_hard-1_epoch-step_64\",\n",
    "    \"train-qwen2_5_vl_7b-pmc_vqa-step_451\",\n",
    "    \"train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_100\",\n",
    "    \"train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_200\",\n",
    "    \"train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_300\",\n",
    "    \"train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_400\",\n",
    "    \"train-qwen2_5_vl_7b-pmc_vqa-easy_to_hard-step_500\",\n",
    "    \"train-qwen2_5_vl_7b-pmc_vqa-m23k_sft_epoch_3-step_1805\",\n",
    "    \"train-qwen2_5_vl_7b-pmc_vqa-m23k_sft_epoch_5-step_800\",\n",
    "    \"train-qwen2_5_vl_7b-pmc_vqa-m23k_rl-step_900\",\n",
    "    ]\n",
    "    # get unused indices and append them to the end\n",
    "    unused_indices = df.index.difference(index_order)\n",
    "    index_order.extend(unused_indices.tolist())\n",
    "    df = df.reindex(index_order)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "0f174eda",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean results saved to outputs/results-greedy-mean.tsv\n",
      "Results saved to outputs/results-greedy-combined.tsv\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_2789375/375717886.py:11: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
      "  mean_str = mean_pct.applymap(lambda x: f\"{x:.2f}\")\n",
      "/tmp/ipykernel_2789375/375717886.py:12: FutureWarning: DataFrame.applymap has been deprecated. Use DataFrame.map instead.\n",
      "  std_str = std_pct.applymap(lambda x: f\"{x:.2f}\")\n"
     ]
    }
   ],
   "source": [
    "mean_pct = mean_df * 100\n",
    "std_pct = std_df * 100\n",
    "\n",
    "# save mean\n",
    "output_file = Path(\"outputs/results-greedy-mean.tsv\")\n",
    "reorder_index(mean_pct).to_csv(output_file, sep=\"\\t\")\n",
    "print(f\"Mean results saved to {output_file}\")\n",
    "\n",
    "\n",
    "# Format mean and std as strings with 2 decimal places\n",
    "mean_str = mean_pct.applymap(lambda x: f\"{x:.2f}\")\n",
    "std_str = std_pct.applymap(lambda x: f\"{x:.2f}\")\n",
    "\n",
    "# Combine as \"mean±std\"\n",
    "combined = mean_str + \" ± \" + std_str\n",
    "combined = reorder_index(combined)\n",
    "\n",
    "# save\n",
    "output_file = Path(\"outputs/results-greedy-combined.tsv\")\n",
    "combined.to_csv(output_file, sep=\"\\t\")\n",
    "print(f\"Results saved to {output_file}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0731a4e",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "jupyter",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
