{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "b39fb233",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy.stats import spearmanr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "177e97c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load data from Excel\n",
    "\n",
    "table_file_name = \"data.xlsx\"\n",
    "sheet_name = \"Rankings\"\n",
    "\n",
    "benchmark_column_chars = \"CDEFGHIJKLMNOPQRSTUVWXYZ\"\n",
    "benchmark_names = [\n",
    "    \"SimpleQA\",\n",
    "    \"Facts Grounding\",\n",
    "    \"GPQA\",\n",
    "    \"MMLU-Pro\",\n",
    "    \"Humanity's Last Exam\",\n",
    "    \"SuperGPQA\",\n",
    "    \"ARC-AGI-2\",\n",
    "    \"MGSM\",\n",
    "    \"MATH-500\",\n",
    "    \"FrontierMath (Tier 1-3)\",\n",
    "    \"AIME\",\n",
    "    \"HMMT\",\n",
    "    \"HumanEval\",\n",
    "    \"LiveCodeBench\",\n",
    "    \"SWE-bench Verified\",\n",
    "    \"Aider polyglot\",\n",
    "    \"Terminal-Bench\",\n",
    "    \"SciCode\",\n",
    "    \"IOI\",\n",
    "    \"IFEval\",\n",
    "    \"IFBench\",\n",
    "    \"ArenaHard\",\n",
    "    \"WritingBench\",\n",
    "    \"Creative Writing v3\",\n",
    "]\n",
    "benchmark_columns = {char: name for char, name in zip(benchmark_column_chars, benchmark_names)}\n",
    "\n",
    "lmarena_column_chars = [\"AA\", \"AB\", \"AC\", \"AD\"]\n",
    "lmarena_names = [\n",
    "    \"Overall\",\n",
    "    \"Coding\",\n",
    "    \"Math\",\n",
    "    \"Instruction Following\"\n",
    "]\n",
    "lmarena_columns = {char: name for char, name in zip(lmarena_column_chars, lmarena_names)}\n",
    "\n",
    "# Read Excel file\n",
    "df = pd.read_excel(table_file_name, sheet_name=sheet_name)\n",
    "\n",
    "benchmark_df = {}\n",
    "for col, name in benchmark_columns.items():\n",
    "    benchmark_df[name] = pd.read_excel(table_file_name, sheet_name=sheet_name, usecols=col, skiprows=12, nrows=114)\n",
    "\n",
    "lmarena_df = {}\n",
    "for col, name in lmarena_columns.items():\n",
    "    lmarena_df[name] = pd.read_excel(table_file_name, sheet_name=sheet_name, usecols=col, skiprows=12, nrows=114)\n",
    "\n",
    "benchmark_np = {name: arr.to_numpy().flatten() for name, arr in benchmark_df.items()}\n",
    "lmarena_np = {name: arr.to_numpy().flatten() for name, arr in lmarena_df.items()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "af0e6c5f",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_spearman(a1, a2):\n",
    "    if len(a1) != len(a2):\n",
    "        print(\"Length mismatch between 2 arrays.\")\n",
    "        return\n",
    "    correlation, p_value = spearmanr(a1, a2)\n",
    "    print(f\"Spearman correlation: {correlation}, p-value: {p_value}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "f17d9fce",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Spearman correlation between SimpleQA and Overall: 0.7921917711573891, p-value: 6.640706636709734e-06, data length: 23\n",
      "Spearman correlation between Facts Grounding and Overall: 0.32094943240454077, p-value: 0.19408223512614617, data length: 18\n",
      "Spearman correlation between GPQA and Overall: 0.7918949708864879, p-value: 5.170570599305235e-07, data length: 28\n",
      "Spearman correlation between MMLU-Pro and Overall: 0.8562745717465936, p-value: 4.846309527635993e-13, data length: 42\n",
      "Spearman correlation between Humanity's Last Exam and Overall: 0.5104895104895105, p-value: 0.08991373630263913, data length: 12\n",
      "Spearman correlation between SuperGPQA and Overall: 0.918192918192918, p-value: 1.509337078778889e-11, data length: 27\n",
      "Spearman correlation between ARC-AGI-2 and Overall: 0.7059575517972256, p-value: 0.0010605585106084716, data length: 18\n",
      "Spearman correlation between MGSM and Overall: 0.6363359075268771, p-value: 0.00035965946242398823, data length: 27\n",
      "Spearman correlation between MATH-500 and Overall: 0.7155918182382405, p-value: 0.0001236494676266748, data length: 23\n",
      "Spearman correlation between FrontierMath (Tier 1-3) and Overall: 0.775996439264913, p-value: 7.566047201282057e-07, data length: 29\n",
      "Spearman correlation between AIME and Overall: 0.5827276485227999, p-value: 0.0009096129413154796, data length: 29\n",
      "Spearman correlation between HMMT and Overall: 0.6656346749226005, p-value: 0.0025683279602937087, data length: 18\n",
      "Spearman correlation between HumanEval and Overall: 0.743332323760148, p-value: 1.554364413319942e-11, data length: 59\n",
      "Spearman correlation between LiveCodeBench and Overall: 0.7802197802197802, p-value: 0.0009944938002309496, data length: 14\n",
      "Spearman correlation between SWE-bench Verified and Overall: 0.6758241758241759, p-value: 0.011225245577668611, data length: 13\n",
      "Spearman correlation between Aider polyglot and Overall: 0.7964602359462749, p-value: 1.9095432310014483e-06, data length: 25\n",
      "Spearman correlation between Terminal-Bench and Overall: 0.7988263874157493, p-value: 6.05899529853723e-12, data length: 49\n",
      "Spearman correlation between SciCode and Overall: 0.6756956824297794, p-value: 1.8957587543597555e-10, data length: 69\n",
      "Spearman correlation between IOI and Overall: 0.7575757575757575, p-value: 0.011143446799694208, data length: 10\n",
      "Spearman correlation between IFEval and Overall: 0.33860054732072103, p-value: 0.14420487868684864, data length: 20\n",
      "Spearman correlation between IFBench and Overall: 0.5786399946176711, p-value: 2.0407477192400676e-05, data length: 47\n",
      "Spearman correlation between ArenaHard and Overall: 0.7316821465428276, p-value: 0.0005580324697729897, data length: 18\n",
      "Spearman correlation between WritingBench and Overall: 0.7085978203784404, p-value: 7.363783518849986e-05, data length: 25\n",
      "Spearman correlation between Creative Writing v3 and Overall: 0.8047196093043626, p-value: 7.2077186248458e-07, data length: 26\n",
      "Spearman correlation between SimpleQA and Coding: 0.7328885818006288, p-value: 6.964905360825711e-05, data length: 23\n",
      "Spearman correlation between Facts Grounding and Coding: 0.22612290055207224, p-value: 0.36692371720297023, data length: 18\n",
      "Spearman correlation between GPQA and Coding: 0.7264267209124936, p-value: 1.2050110948330904e-05, data length: 28\n",
      "Spearman correlation between MMLU-Pro and Coding: 0.8667990614366556, p-value: 1.1722336197257598e-13, data length: 42\n",
      "Spearman correlation between Humanity's Last Exam and Coding: 0.4265734265734266, p-value: 0.16669998762494495, data length: 12\n",
      "Spearman correlation between SuperGPQA and Coding: 0.915725233528361, p-value: 2.1587389111223007e-11, data length: 27\n",
      "Spearman correlation between ARC-AGI-2 and Coding: 0.6619126314880031, p-value: 0.0027688374575282077, data length: 18\n",
      "Spearman correlation between MGSM and Coding: 0.6472294381620036, p-value: 0.00026334247968617133, data length: 27\n",
      "Spearman correlation between MATH-500 and Coding: 0.7147801169863623, p-value: 0.00012689716210941324, data length: 23\n",
      "Spearman correlation between FrontierMath (Tier 1-3) and Coding: 0.7510209635590738, p-value: 2.680910447434238e-06, data length: 29\n",
      "Spearman correlation between AIME and Coding: 0.6159152687534669, p-value: 0.00037517522142555167, data length: 29\n",
      "Spearman correlation between HMMT and Coding: 0.6311986839178357, p-value: 0.004966466387694151, data length: 18\n",
      "Spearman correlation between HumanEval and Coding: 0.7769138849999649, p-value: 4.720338761629843e-13, data length: 59\n",
      "Spearman correlation between LiveCodeBench and Coding: 0.767032967032967, p-value: 0.0013676823882456158, data length: 14\n",
      "Spearman correlation between SWE-bench Verified and Coding: 0.7977999294270577, p-value: 0.0010834604307582453, data length: 13\n",
      "Spearman correlation between Aider polyglot and Coding: 0.8661024111666977, p-value: 2.2223500593748805e-08, data length: 25\n",
      "Spearman correlation between Terminal-Bench and Coding: 0.7955293890667978, p-value: 8.53633347636763e-12, data length: 49\n",
      "Spearman correlation between SciCode and Coding: 0.6847712012887669, p-value: 8.668811905668512e-11, data length: 69\n",
      "Spearman correlation between IOI and Coding: 0.6363636363636362, p-value: 0.04791172612997547, data length: 10\n",
      "Spearman correlation between IFEval and Coding: 0.3106431208775953, p-value: 0.1825155840840984, data length: 20\n",
      "Spearman correlation between IFBench and Coding: 0.5895686404638528, p-value: 1.2976184974086205e-05, data length: 47\n",
      "Spearman correlation between ArenaHard and Coding: 0.8039215686274509, p-value: 5.816363864327564e-05, data length: 18\n",
      "Spearman correlation between WritingBench and Coding: 0.7905366562853936, p-value: 2.5738221851196783e-06, data length: 25\n",
      "Spearman correlation between Creative Writing v3 and Coding: 0.7891227308583849, p-value: 1.6617976313724301e-06, data length: 26\n",
      "Spearman correlation between SimpleQA and Math: 0.7154150197628458, p-value: 0.0001243506200612869, data length: 23\n",
      "Spearman correlation between Facts Grounding and Math: 0.44685242518059853, p-value: 0.06301029106386723, data length: 18\n",
      "Spearman correlation between GPQA and Math: 0.9320714503342439, p-value: 1.5948639375560897e-12, data length: 27\n",
      "Spearman correlation between MMLU-Pro and Math: 0.8640784823433811, p-value: 1.7109870228572624e-13, data length: 42\n",
      "Spearman correlation between Humanity's Last Exam and Math: 0.8251748251748254, p-value: 0.0009513628555555001, data length: 12\n",
      "Spearman correlation between SuperGPQA and Math: 0.9413919413919414, p-value: 2.65275232271181e-13, data length: 27\n",
      "Spearman correlation between ARC-AGI-2 and Math: 0.9019352630339723, p-value: 3.149255576777987e-07, data length: 18\n",
      "Spearman correlation between MGSM and Math: 0.7077862925370926, p-value: 3.635459096937446e-05, data length: 27\n",
      "Spearman correlation between MATH-500 and Math: 0.8850607526047107, p-value: 4.483832374556989e-08, data length: 22\n",
      "Spearman correlation between FrontierMath (Tier 1-3) and Math: 0.867113663073221, p-value: 1.1671682761874538e-09, data length: 29\n",
      "Spearman correlation between AIME and Math: 0.7847269814830894, p-value: 7.684843041124072e-07, data length: 28\n",
      "Spearman correlation between HMMT and Math: 0.8534571723426212, p-value: 6.681541734969331e-06, data length: 18\n",
      "Spearman correlation between HumanEval and Math: 0.8378461336017661, p-value: 1.2930045868541923e-16, data length: 59\n",
      "Spearman correlation between LiveCodeBench and Math: 0.9218927470757684, p-value: 2.765768950784494e-06, data length: 14\n",
      "Spearman correlation between SWE-bench Verified and Math: 0.5439560439560439, p-value: 0.05464653454086549, data length: 13\n",
      "Spearman correlation between Aider polyglot and Math: 0.8439485723998008, p-value: 1.1533349468715056e-07, data length: 25\n",
      "Spearman correlation between Terminal-Bench and Math: 0.7694427594469473, p-value: 1.0494201377798266e-10, data length: 49\n",
      "Spearman correlation between SciCode and Math: 0.7314332713420184, p-value: 9.498087965116878e-13, data length: 69\n",
      "Spearman correlation between IOI and Math: 0.8303030303030302, p-value: 0.0029402270232795065, data length: 10\n",
      "Spearman correlation between IFEval and Math: 0.38796992481203, p-value: 0.0909728657975423, data length: 20\n",
      "Spearman correlation between IFBench and Math: 0.7035712136428173, p-value: 4.9367858827951e-08, data length: 46\n",
      "Spearman correlation between ArenaHard and Math: 0.9364998666699977, p-value: 1.087930445187335e-08, data length: 18\n",
      "Spearman correlation between WritingBench and Math: 0.7826087535819918, p-value: 3.7828901933676273e-06, data length: 25\n",
      "Spearman correlation between Creative Writing v3 and Math: 0.6956224757012637, p-value: 7.970006052704018e-05, data length: 26\n",
      "Spearman correlation between SimpleQA and Instruction Following: 0.7973307943976504, p-value: 5.231474780443541e-06, data length: 23\n",
      "Spearman correlation between Facts Grounding and Instruction Following: 0.27347781217750256, p-value: 0.2721726853194866, data length: 18\n",
      "Spearman correlation between GPQA and Instruction Following: 0.7438741215932508, p-value: 5.709440858044898e-06, data length: 28\n",
      "Spearman correlation between MMLU-Pro and Instruction Following: 0.8796142534296354, p-value: 1.7530889546772718e-14, data length: 42\n",
      "Spearman correlation between Humanity's Last Exam and Instruction Following: 0.34676060182748136, p-value: 0.2694793884630332, data length: 12\n",
      "Spearman correlation between SuperGPQA and Instruction Following: 0.919859574142508, p-value: 1.1777136828479082e-11, data length: 27\n",
      "Spearman correlation between ARC-AGI-2 and Instruction Following: 0.7082740963888797, p-value: 0.0010036992187313731, data length: 18\n",
      "Spearman correlation between MGSM and Instruction Following: 0.6327684279726377, p-value: 0.00039729728053105424, data length: 27\n",
      "Spearman correlation between MATH-500 and Instruction Following: 0.6885813990055344, p-value: 0.00028040453504578615, data length: 23\n",
      "Spearman correlation between FrontierMath (Tier 1-3) and Instruction Following: 0.7774782911540248, p-value: 6.983967404374053e-07, data length: 29\n",
      "Spearman correlation between AIME and Instruction Following: 0.47702356344192, p-value: 0.008883884515045695, data length: 29\n",
      "Spearman correlation between HMMT and Instruction Following: 0.6566856837950591, p-value: 0.0030719841015262707, data length: 18\n",
      "Spearman correlation between HumanEval and Instruction Following: 0.7911728359634007, p-value: 8.869070548113454e-14, data length: 59\n",
      "Spearman correlation between LiveCodeBench and Instruction Following: 0.655665963314031, p-value: 0.010900702611398465, data length: 14\n",
      "Spearman correlation between SWE-bench Verified and Instruction Following: 0.7207709707237556, p-value: 0.005440787014879612, data length: 13\n",
      "Spearman correlation between Aider polyglot and Instruction Following: 0.8173947869480972, p-value: 6.119641227588378e-07, data length: 25\n",
      "Spearman correlation between Terminal-Bench and Instruction Following: 0.8192672764367401, p-value: 6.220787997517891e-13, data length: 49\n",
      "Spearman correlation between SciCode and Instruction Following: 0.7208554556412237, p-value: 2.860925464655999e-12, data length: 69\n",
      "Spearman correlation between IOI and Instruction Following: 0.7781190959104827, p-value: 0.00803313849599635, data length: 10\n",
      "Spearman correlation between IFEval and Instruction Following: 0.4543061622519797, p-value: 0.04418930442435163, data length: 20\n",
      "Spearman correlation between IFBench and Instruction Following: 0.6127558731851167, p-value: 4.691810397367869e-06, data length: 47\n",
      "Spearman correlation between ArenaHard and Instruction Following: 0.7275541795665634, p-value: 0.0006215020932519374, data length: 18\n",
      "Spearman correlation between WritingBench and Instruction Following: 0.7238792816616624, p-value: 4.310406765008875e-05, data length: 25\n",
      "Spearman correlation between Creative Writing v3 and Instruction Following: 0.8418533205617331, p-value: 7.034359274876429e-08, data length: 26\n"
     ]
    }
   ],
   "source": [
    "# Calculate Spearman correlations for each benchmark against each LMArena dimension\n",
    "\n",
    "results = {\n",
    "    \"spearman_rho\": {},\n",
    "    \"spearman_p\": {},\n",
    "    \"data_length\": {}\n",
    "}\n",
    "headers = list(benchmark_columns.values())\n",
    "\n",
    "for lmarena_column in lmarena_columns.keys():\n",
    "    for benchmark_column in benchmark_columns.keys():\n",
    "        benchmark_name = benchmark_columns[benchmark_column]\n",
    "        lmarena_name = lmarena_columns[lmarena_column]\n",
    "        benchmark_data = benchmark_np[benchmark_name]\n",
    "        lmarena_data = lmarena_np[lmarena_name]\n",
    "        non_nan_mask = ~np.isnan(benchmark_data) & ~np.isnan(lmarena_data)\n",
    "        benchmark_data = benchmark_data[non_nan_mask]\n",
    "        lmarena_data = lmarena_data[non_nan_mask]\n",
    "        lmarena_data = -lmarena_data  # Invert LMArena scores to match benchmark direction\n",
    "        if len(benchmark_data) < 3:\n",
    "            print(f\"Not enough data to compute correlation for {benchmark_name} and {lmarena_name}.\")\n",
    "            continue\n",
    "        correlation, p_value = spearmanr(benchmark_data, lmarena_data)\n",
    "        print(f\"Spearman correlation between {benchmark_name} and {lmarena_name}: {correlation}, p-value: {p_value}, data length: {len(benchmark_data)}\")\n",
    "        if lmarena_name not in results[\"spearman_rho\"]:\n",
    "            results[\"spearman_rho\"][lmarena_name] = []\n",
    "            results[\"spearman_p\"][lmarena_name] = []\n",
    "            results[\"data_length\"][lmarena_name] = []\n",
    "        results[\"spearman_rho\"][lmarena_name].append(correlation)\n",
    "        results[\"spearman_p\"][lmarena_name].append(p_value)\n",
    "        results[\"data_length\"][lmarena_name].append(len(benchmark_data))\n",
    "        \n",
    "result_dfs = {\n",
    "    \"spearman_rho\": pd.DataFrame(results[\"spearman_rho\"], index=headers),\n",
    "    \"spearman_p\": pd.DataFrame(results[\"spearman_p\"], index=headers),\n",
    "    \"data_length\": pd.DataFrame(results[\"data_length\"], index=headers)\n",
    "}\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "9ca538be",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Overall': [0.7921917711573891,\n",
       "  0.32094943240454077,\n",
       "  0.7918949708864879,\n",
       "  0.8562745717465936,\n",
       "  0.5104895104895105,\n",
       "  0.918192918192918,\n",
       "  0.7059575517972256,\n",
       "  0.6363359075268771,\n",
       "  0.7155918182382405,\n",
       "  0.775996439264913,\n",
       "  0.5827276485227999,\n",
       "  0.6656346749226005,\n",
       "  0.743332323760148,\n",
       "  0.7802197802197802,\n",
       "  0.6758241758241759,\n",
       "  0.7964602359462749,\n",
       "  0.7988263874157493,\n",
       "  0.6756956824297794,\n",
       "  0.7575757575757575,\n",
       "  0.33860054732072103,\n",
       "  0.5786399946176711,\n",
       "  0.7316821465428276,\n",
       "  0.7085978203784404,\n",
       "  0.8047196093043626],\n",
       " 'Coding': [0.7328885818006288,\n",
       "  0.22612290055207224,\n",
       "  0.7264267209124936,\n",
       "  0.8667990614366556,\n",
       "  0.4265734265734266,\n",
       "  0.915725233528361,\n",
       "  0.6619126314880031,\n",
       "  0.6472294381620036,\n",
       "  0.7147801169863623,\n",
       "  0.7510209635590738,\n",
       "  0.6159152687534669,\n",
       "  0.6311986839178357,\n",
       "  0.7769138849999649,\n",
       "  0.767032967032967,\n",
       "  0.7977999294270577,\n",
       "  0.8661024111666977,\n",
       "  0.7955293890667978,\n",
       "  0.6847712012887669,\n",
       "  0.6363636363636362,\n",
       "  0.3106431208775953,\n",
       "  0.5895686404638528,\n",
       "  0.8039215686274509,\n",
       "  0.7905366562853936,\n",
       "  0.7891227308583849],\n",
       " 'Math': [0.7154150197628458,\n",
       "  0.44685242518059853,\n",
       "  0.9320714503342439,\n",
       "  0.8640784823433811,\n",
       "  0.8251748251748254,\n",
       "  0.9413919413919414,\n",
       "  0.9019352630339723,\n",
       "  0.7077862925370926,\n",
       "  0.8850607526047107,\n",
       "  0.867113663073221,\n",
       "  0.7847269814830894,\n",
       "  0.8534571723426212,\n",
       "  0.8378461336017661,\n",
       "  0.9218927470757684,\n",
       "  0.5439560439560439,\n",
       "  0.8439485723998008,\n",
       "  0.7694427594469473,\n",
       "  0.7314332713420184,\n",
       "  0.8303030303030302,\n",
       "  0.38796992481203,\n",
       "  0.7035712136428173,\n",
       "  0.9364998666699977,\n",
       "  0.7826087535819918,\n",
       "  0.6956224757012637],\n",
       " 'Instruction Following': [0.7973307943976504,\n",
       "  0.27347781217750256,\n",
       "  0.7438741215932508,\n",
       "  0.8796142534296354,\n",
       "  0.34676060182748136,\n",
       "  0.919859574142508,\n",
       "  0.7082740963888797,\n",
       "  0.6327684279726377,\n",
       "  0.6885813990055344,\n",
       "  0.7774782911540248,\n",
       "  0.47702356344192,\n",
       "  0.6566856837950591,\n",
       "  0.7911728359634007,\n",
       "  0.655665963314031,\n",
       "  0.7207709707237556,\n",
       "  0.8173947869480972,\n",
       "  0.8192672764367401,\n",
       "  0.7208554556412237,\n",
       "  0.7781190959104827,\n",
       "  0.4543061622519797,\n",
       "  0.6127558731851167,\n",
       "  0.7275541795665634,\n",
       "  0.7238792816616624,\n",
       "  0.8418533205617331]}"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results[\"spearman_rho\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "3b242139",
   "metadata": {},
   "outputs": [
    {
     "ename": "PermissionError",
     "evalue": "[Errno 13] Permission denied: 'spearman_rho.xlsx'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mPermissionError\u001b[0m                           Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[35], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m# Save results to Excel files\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m result_dfs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspearman_rho\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mto_excel(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspearman_rho.xlsx\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m      4\u001b[0m result_dfs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspearman_p\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mto_excel(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspearman_p.xlsx\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m      5\u001b[0m result_dfs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_length\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mto_excel(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata_length.xlsx\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\util\\_decorators.py:333\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    327\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[0;32m    328\u001b[0m     warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m    329\u001b[0m         msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[0;32m    330\u001b[0m         \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[0;32m    331\u001b[0m         stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[0;32m    332\u001b[0m     )\n\u001b[1;32m--> 333\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\core\\generic.py:2417\u001b[0m, in \u001b[0;36mNDFrame.to_excel\u001b[1;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, inf_rep, freeze_panes, storage_options, engine_kwargs)\u001b[0m\n\u001b[0;32m   2404\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mio\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexcel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ExcelFormatter\n\u001b[0;32m   2406\u001b[0m formatter \u001b[38;5;241m=\u001b[39m ExcelFormatter(\n\u001b[0;32m   2407\u001b[0m     df,\n\u001b[0;32m   2408\u001b[0m     na_rep\u001b[38;5;241m=\u001b[39mna_rep,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   2415\u001b[0m     inf_rep\u001b[38;5;241m=\u001b[39minf_rep,\n\u001b[0;32m   2416\u001b[0m )\n\u001b[1;32m-> 2417\u001b[0m formatter\u001b[38;5;241m.\u001b[39mwrite(\n\u001b[0;32m   2418\u001b[0m     excel_writer,\n\u001b[0;32m   2419\u001b[0m     sheet_name\u001b[38;5;241m=\u001b[39msheet_name,\n\u001b[0;32m   2420\u001b[0m     startrow\u001b[38;5;241m=\u001b[39mstartrow,\n\u001b[0;32m   2421\u001b[0m     startcol\u001b[38;5;241m=\u001b[39mstartcol,\n\u001b[0;32m   2422\u001b[0m     freeze_panes\u001b[38;5;241m=\u001b[39mfreeze_panes,\n\u001b[0;32m   2423\u001b[0m     engine\u001b[38;5;241m=\u001b[39mengine,\n\u001b[0;32m   2424\u001b[0m     storage_options\u001b[38;5;241m=\u001b[39mstorage_options,\n\u001b[0;32m   2425\u001b[0m     engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[0;32m   2426\u001b[0m )\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\io\\formats\\excel.py:943\u001b[0m, in \u001b[0;36mExcelFormatter.write\u001b[1;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options, engine_kwargs)\u001b[0m\n\u001b[0;32m    941\u001b[0m     need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m    942\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 943\u001b[0m     writer \u001b[38;5;241m=\u001b[39m ExcelWriter(\n\u001b[0;32m    944\u001b[0m         writer,\n\u001b[0;32m    945\u001b[0m         engine\u001b[38;5;241m=\u001b[39mengine,\n\u001b[0;32m    946\u001b[0m         storage_options\u001b[38;5;241m=\u001b[39mstorage_options,\n\u001b[0;32m    947\u001b[0m         engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[0;32m    948\u001b[0m     )\n\u001b[0;32m    949\u001b[0m     need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m    951\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\io\\excel\\_openpyxl.py:61\u001b[0m, in \u001b[0;36mOpenpyxlWriter.__init__\u001b[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001b[0m\n\u001b[0;32m     57\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopenpyxl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mworkbook\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Workbook\n\u001b[0;32m     59\u001b[0m engine_kwargs \u001b[38;5;241m=\u001b[39m combine_kwargs(engine_kwargs, kwargs)\n\u001b[1;32m---> 61\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\n\u001b[0;32m     62\u001b[0m     path,\n\u001b[0;32m     63\u001b[0m     mode\u001b[38;5;241m=\u001b[39mmode,\n\u001b[0;32m     64\u001b[0m     storage_options\u001b[38;5;241m=\u001b[39mstorage_options,\n\u001b[0;32m     65\u001b[0m     if_sheet_exists\u001b[38;5;241m=\u001b[39mif_sheet_exists,\n\u001b[0;32m     66\u001b[0m     engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[0;32m     67\u001b[0m )\n\u001b[0;32m     69\u001b[0m \u001b[38;5;66;03m# ExcelWriter replaced \"a\" by \"r+\" to allow us to first read the excel file from\u001b[39;00m\n\u001b[0;32m     70\u001b[0m \u001b[38;5;66;03m# the file and later write to it\u001b[39;00m\n\u001b[0;32m     71\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr+\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mode:  \u001b[38;5;66;03m# Load from existing workbook\u001b[39;00m\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\io\\excel\\_base.py:1246\u001b[0m, in \u001b[0;36mExcelWriter.__init__\u001b[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs)\u001b[0m\n\u001b[0;32m   1242\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handles \u001b[38;5;241m=\u001b[39m IOHandles(\n\u001b[0;32m   1243\u001b[0m     cast(IO[\u001b[38;5;28mbytes\u001b[39m], path), compression\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompression\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mNone\u001b[39;00m}\n\u001b[0;32m   1244\u001b[0m )\n\u001b[0;32m   1245\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(path, ExcelWriter):\n\u001b[1;32m-> 1246\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handles \u001b[38;5;241m=\u001b[39m get_handle(\n\u001b[0;32m   1247\u001b[0m         path, mode, storage_options\u001b[38;5;241m=\u001b[39mstorage_options, is_text\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m   1248\u001b[0m     )\n\u001b[0;32m   1249\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cur_sheet \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m   1251\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m date_format \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\io\\common.py:882\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m    873\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\n\u001b[0;32m    874\u001b[0m             handle,\n\u001b[0;32m    875\u001b[0m             ioargs\u001b[38;5;241m.\u001b[39mmode,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    878\u001b[0m             newline\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m    879\u001b[0m         )\n\u001b[0;32m    880\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    881\u001b[0m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m--> 882\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n\u001b[0;32m    883\u001b[0m     handles\u001b[38;5;241m.\u001b[39mappend(handle)\n\u001b[0;32m    885\u001b[0m \u001b[38;5;66;03m# Convert BytesIO or file objects passed with an encoding\u001b[39;00m\n",
      "\u001b[1;31mPermissionError\u001b[0m: [Errno 13] Permission denied: 'spearman_rho.xlsx'"
     ]
    }
   ],
   "source": [
    "# Save results to Excel files\n",
    "\n",
    "result_dfs[\"spearman_rho\"].to_excel(\"spearman_rho.xlsx\")\n",
    "result_dfs[\"spearman_p\"].to_excel(\"spearman_p.xlsx\")\n",
    "result_dfs[\"data_length\"].to_excel(\"data_length.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c9c08d1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Group benchmarks into categories and compute average correlations per category\n",
    "\n",
    "benchmark_categories = {\n",
    "    \"Question Answering\": [\"SimpleQA\", \"Facts Grounding\", \"GPQA\", \"MMLU-Pro\", \"Humanity's Last Exam\", \"SuperGPQA\", \"ARC-AGI-2\"],\n",
    "    \"Mathematics\": [\"MGSM\", \"MATH-500\", \"FrontierMath (Tier 1-3)\", \"AIME\", \"HMMT\"],\n",
    "    \"Code\": [\"HumanEval\", \"LiveCodeBench\", \"SWE-bench Verified\", \"Aider polyglot\", \"Terminal-Bench\", \"SciCode\", \"IOI\"],\n",
    "    \"Alignment\": [\"IFEval\", \"IFBench\", \"ArenaHard\", \"WritingBench\", \"Creative Writing v3\"],\n",
    "}\n",
    "\n",
    "grouped_results = {\n",
    "    \"spearman_rho\": {},\n",
    "    \"spearman_p\": {},\n",
    "    \"data_length\": {}\n",
    "}\n",
    "\n",
    "grouped_headers = list(benchmark_categories.keys())\n",
    "\n",
    "for i, lmarena_column in enumerate(lmarena_columns.values()):\n",
    "    for category, benchmarks in benchmark_categories.items():\n",
    "        spearman_rhos = []\n",
    "        spearman_ps = []\n",
    "        data_lengths = []\n",
    "        for benchmark in benchmarks:\n",
    "            if benchmark in result_dfs[\"spearman_rho\"].index:\n",
    "                spearman_rhos.append(result_dfs[\"spearman_rho\"].at[benchmark, lmarena_column])\n",
    "                spearman_ps.append(result_dfs[\"spearman_p\"].at[benchmark, lmarena_column])\n",
    "                data_lengths.append(result_dfs[\"data_length\"].at[benchmark, lmarena_column])\n",
    "            else:\n",
    "                print(f\"Benchmark {benchmark} not found in results.\")\n",
    "        if len(spearman_rhos) > 0:\n",
    "            if lmarena_column not in grouped_results[\"spearman_rho\"]:\n",
    "                grouped_results[\"spearman_rho\"][lmarena_column] = []\n",
    "                grouped_results[\"spearman_p\"][lmarena_column] = []\n",
    "                grouped_results[\"data_length\"][lmarena_column] = []\n",
    "            grouped_results[\"spearman_rho\"][lmarena_column].append(np.mean(spearman_rhos))\n",
    "            grouped_results[\"spearman_p\"][lmarena_column].append(np.mean(spearman_ps))\n",
    "            grouped_results[\"data_length\"][lmarena_column].append(np.mean(data_lengths))\n",
    "\n",
    "grouped_result_dfs = {\n",
    "    \"spearman_rho\": pd.DataFrame(grouped_results[\"spearman_rho\"], index=grouped_headers),\n",
    "    \"spearman_p\": pd.DataFrame(grouped_results[\"spearman_p\"], index=grouped_headers),\n",
    "    \"data_length\": pd.DataFrame(grouped_results[\"data_length\"], index=grouped_headers)\n",
    "}\n",
    "\n",
    "grouped_result_dfs[\"spearman_rho\"].to_excel(\"grouped_spearman_rho.xlsx\")\n",
    "grouped_result_dfs[\"spearman_p\"].to_excel(\"grouped_spearman_p.xlsx\")\n",
    "grouped_result_dfs[\"data_length\"].to_excel(\"grouped_data_length.xlsx\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "abde3d26",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Overall': [0.699421532382095,\n",
       "  0.6752572976950862,\n",
       "  0.7468477633102379,\n",
       "  0.6324480236328045],\n",
       " 'Coding': [0.6509212223273773,\n",
       "  0.6720288942757484,\n",
       "  0.7606447741922696,\n",
       "  0.6567585434225355],\n",
       " 'Math': [0.8038456296031155,\n",
       "  0.819628972408147,\n",
       "  0.7826889368750536,\n",
       "  0.70125444688162],\n",
       " 'Instruction Following': [0.667027321993844,\n",
       "  0.6465074730738352,\n",
       "  0.7576066264196758,\n",
       "  0.672069763445411]}"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grouped_results[\"spearman_rho\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "ff4e1f85",
   "metadata": {},
   "outputs": [
    {
     "ename": "PermissionError",
     "evalue": "[Errno 13] Permission denied: 'formatted_spearman_rho.xlsx'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mPermissionError\u001b[0m                           Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[37], line 25\u001b[0m\n\u001b[0;32m     23\u001b[0m                 new_str \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m N.S.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m     24\u001b[0m             new_result_dfs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspearman_rho\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mat[idx, col] \u001b[38;5;241m=\u001b[39m new_str\n\u001b[1;32m---> 25\u001b[0m new_result_dfs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mspearman_rho\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39mto_excel(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformatted_spearman_rho.xlsx\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\util\\_decorators.py:333\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    327\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[0;32m    328\u001b[0m     warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m    329\u001b[0m         msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39m_format_argument_list(allow_args)),\n\u001b[0;32m    330\u001b[0m         \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[0;32m    331\u001b[0m         stacklevel\u001b[38;5;241m=\u001b[39mfind_stack_level(),\n\u001b[0;32m    332\u001b[0m     )\n\u001b[1;32m--> 333\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\core\\generic.py:2417\u001b[0m, in \u001b[0;36mNDFrame.to_excel\u001b[1;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, inf_rep, freeze_panes, storage_options, engine_kwargs)\u001b[0m\n\u001b[0;32m   2404\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mio\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexcel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ExcelFormatter\n\u001b[0;32m   2406\u001b[0m formatter \u001b[38;5;241m=\u001b[39m ExcelFormatter(\n\u001b[0;32m   2407\u001b[0m     df,\n\u001b[0;32m   2408\u001b[0m     na_rep\u001b[38;5;241m=\u001b[39mna_rep,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   2415\u001b[0m     inf_rep\u001b[38;5;241m=\u001b[39minf_rep,\n\u001b[0;32m   2416\u001b[0m )\n\u001b[1;32m-> 2417\u001b[0m formatter\u001b[38;5;241m.\u001b[39mwrite(\n\u001b[0;32m   2418\u001b[0m     excel_writer,\n\u001b[0;32m   2419\u001b[0m     sheet_name\u001b[38;5;241m=\u001b[39msheet_name,\n\u001b[0;32m   2420\u001b[0m     startrow\u001b[38;5;241m=\u001b[39mstartrow,\n\u001b[0;32m   2421\u001b[0m     startcol\u001b[38;5;241m=\u001b[39mstartcol,\n\u001b[0;32m   2422\u001b[0m     freeze_panes\u001b[38;5;241m=\u001b[39mfreeze_panes,\n\u001b[0;32m   2423\u001b[0m     engine\u001b[38;5;241m=\u001b[39mengine,\n\u001b[0;32m   2424\u001b[0m     storage_options\u001b[38;5;241m=\u001b[39mstorage_options,\n\u001b[0;32m   2425\u001b[0m     engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[0;32m   2426\u001b[0m )\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\io\\formats\\excel.py:943\u001b[0m, in \u001b[0;36mExcelFormatter.write\u001b[1;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options, engine_kwargs)\u001b[0m\n\u001b[0;32m    941\u001b[0m     need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m    942\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 943\u001b[0m     writer \u001b[38;5;241m=\u001b[39m ExcelWriter(\n\u001b[0;32m    944\u001b[0m         writer,\n\u001b[0;32m    945\u001b[0m         engine\u001b[38;5;241m=\u001b[39mengine,\n\u001b[0;32m    946\u001b[0m         storage_options\u001b[38;5;241m=\u001b[39mstorage_options,\n\u001b[0;32m    947\u001b[0m         engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[0;32m    948\u001b[0m     )\n\u001b[0;32m    949\u001b[0m     need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m    951\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\io\\excel\\_openpyxl.py:61\u001b[0m, in \u001b[0;36mOpenpyxlWriter.__init__\u001b[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001b[0m\n\u001b[0;32m     57\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopenpyxl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mworkbook\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Workbook\n\u001b[0;32m     59\u001b[0m engine_kwargs \u001b[38;5;241m=\u001b[39m combine_kwargs(engine_kwargs, kwargs)\n\u001b[1;32m---> 61\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\n\u001b[0;32m     62\u001b[0m     path,\n\u001b[0;32m     63\u001b[0m     mode\u001b[38;5;241m=\u001b[39mmode,\n\u001b[0;32m     64\u001b[0m     storage_options\u001b[38;5;241m=\u001b[39mstorage_options,\n\u001b[0;32m     65\u001b[0m     if_sheet_exists\u001b[38;5;241m=\u001b[39mif_sheet_exists,\n\u001b[0;32m     66\u001b[0m     engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[0;32m     67\u001b[0m )\n\u001b[0;32m     69\u001b[0m \u001b[38;5;66;03m# ExcelWriter replaced \"a\" by \"r+\" to allow us to first read the excel file from\u001b[39;00m\n\u001b[0;32m     70\u001b[0m \u001b[38;5;66;03m# the file and later write to it\u001b[39;00m\n\u001b[0;32m     71\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr+\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mode:  \u001b[38;5;66;03m# Load from existing workbook\u001b[39;00m\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\io\\excel\\_base.py:1246\u001b[0m, in \u001b[0;36mExcelWriter.__init__\u001b[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs)\u001b[0m\n\u001b[0;32m   1242\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handles \u001b[38;5;241m=\u001b[39m IOHandles(\n\u001b[0;32m   1243\u001b[0m     cast(IO[\u001b[38;5;28mbytes\u001b[39m], path), compression\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompression\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mNone\u001b[39;00m}\n\u001b[0;32m   1244\u001b[0m )\n\u001b[0;32m   1245\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(path, ExcelWriter):\n\u001b[1;32m-> 1246\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handles \u001b[38;5;241m=\u001b[39m get_handle(\n\u001b[0;32m   1247\u001b[0m         path, mode, storage_options\u001b[38;5;241m=\u001b[39mstorage_options, is_text\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m   1248\u001b[0m     )\n\u001b[0;32m   1249\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cur_sheet \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m   1251\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m date_format \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "File \u001b[1;32md:\\anaconda3\\Lib\\site-packages\\pandas\\io\\common.py:882\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m    873\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\n\u001b[0;32m    874\u001b[0m             handle,\n\u001b[0;32m    875\u001b[0m             ioargs\u001b[38;5;241m.\u001b[39mmode,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    878\u001b[0m             newline\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m    879\u001b[0m         )\n\u001b[0;32m    880\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    881\u001b[0m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m--> 882\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n\u001b[0;32m    883\u001b[0m     handles\u001b[38;5;241m.\u001b[39mappend(handle)\n\u001b[0;32m    885\u001b[0m \u001b[38;5;66;03m# Convert BytesIO or file objects passed with an encoding\u001b[39;00m\n",
      "\u001b[1;31mPermissionError\u001b[0m: [Errno 13] Permission denied: 'formatted_spearman_rho.xlsx'"
     ]
    }
   ],
   "source": [
    "# Format Spearman correlation results with significance stars and save to Excel\n",
    "\n",
    "new_result_dfs = {\n",
    "    \"spearman_rho\": pd.DataFrame(index=result_dfs[\"spearman_rho\"].index, columns=result_dfs[\"spearman_rho\"].columns, dtype=str),\n",
    "    \"spearman_p\": result_dfs[\"spearman_p\"],\n",
    "    \"data_length\": result_dfs[\"data_length\"]\n",
    "}\n",
    "for col in result_dfs[\"spearman_rho\"].columns:\n",
    "    for idx in result_dfs[\"spearman_rho\"].index:\n",
    "        val = result_dfs[\"spearman_rho\"].at[idx, col]\n",
    "        if pd.isna(val):\n",
    "            new_str = \"\"\n",
    "        else:\n",
    "            new_str = f\"{val:.2f}\"\n",
    "            p_val = result_dfs[\"spearman_p\"].at[idx, col]\n",
    "            if p_val < 0.05:\n",
    "                new_str += \"*\"\n",
    "                if p_val < 0.01:\n",
    "                    new_str += \"*\"\n",
    "                    if p_val < 0.001:\n",
    "                        new_str += \"*\"\n",
    "            else:\n",
    "                new_str += \" N.S.\"\n",
    "            new_result_dfs[\"spearman_rho\"].at[idx, col] = new_str\n",
    "new_result_dfs[\"spearman_rho\"].to_excel(\"formatted_spearman_rho.xlsx\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6bdcfc93",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
