{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "82f85abb-daae-4980-8c39-598e650acb5c",
   "metadata": {},
   "source": [
    "# Summarize and visualize results"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e0f76f96-9fde-495d-818e-43933e693534",
   "metadata": {},
   "source": [
    "The file 'autofolio_results_raw_v_1_all_seeds.pkl' contains all the experiment data pre-calculated to avoid having to run the previous notebooks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "86260b32-2f9c-42fd-8906-85d2cd771909",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "from tqdm.auto import tqdm\n",
    "tqdm.pandas()\n",
    "\n",
    "CLASSIFICATION = 'classification'\n",
    "REGRESSION = 'regression'\n",
    "AMLB = 'AMLB'\n",
    "ASLIB = 'ASLIB'\n",
    "\n",
    "ASLIB_FILE = 'aslib_weka.csv'\n",
    "\n",
    "AMLB_CLS_FILE = 'amlb_classification.csv'\n",
    "AMLB_REG_FILE = 'amlb_regression.csv'\n",
    "\n",
    "CV = 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "e420db6f-ace3-4889-be24-4cb9b5a97309",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from summarize import (\n",
    "    calculate_avg_rank_dict,\n",
    "    scale_task_score_values,\n",
    "    scale_score_results,\n",
    "    scale_reg_results,\n",
    "    aggregate_metrics_to_dataframe,\n",
    "    calculate_wilcoxon,\n",
    "    get_difference_data,\n",
    "    get_percentage_improvement_data,\n",
    "    get_formatted_results,\n",
    "    transformar_notacion_cientifica,\n",
    "    calculate_wilcoxon_complete\n",
    ")\n",
    "\n",
    "from visualization import (\n",
    "    highlight_max,\n",
    "    highlight_positive_differences,\n",
    "    highlight_differences,\n",
    "    highlight_pvalues,\n",
    "    highlight_best,\n",
    "    highlight_best_differences,\n",
    "    highlight_best_differences_reg,\n",
    "    mean_and_std_sup,\n",
    "    mean_and_std_sup_not,\n",
    "    mean_and_std,\n",
    "    rename_columns,\n",
    "    get_score_pos_dataframe,\n",
    "    prepare_data,\n",
    "    plot_three_scaled_boxes_horizontal,\n",
    "    rename_metrics,\n",
    "    rename_metrics_aslib\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9885be33-9be7-4620-9156-f3f3ec6ec4e8",
   "metadata": {},
   "source": [
    "#### Cargar resultados"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a102321d-6c3e-4353-b614-2184e31cfa5b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "def load_and_merge_results(file_paths):\n",
    "    \n",
    "    import pickle\n",
    "    \n",
    "    results = {}\n",
    "    \n",
    "    for file_path in file_paths:\n",
    "        print(file_path)\n",
    "        with open(file_path, 'rb') as file:\n",
    "            partial_results = pickle.load(file)\n",
    "            print(partial_results['description'])\n",
    "            for key in partial_results.keys():\n",
    "                if key not in results:\n",
    "                    results[key] = partial_results[key]\n",
    "                elif isinstance(partial_results[key],dict):\n",
    "                    for app in partial_results[key].keys():\n",
    "                        if app not in results[key].keys():\n",
    "                            results[key][app]=partial_results[key][app]    \n",
    "                        else:\n",
    "                            results[key][app].update(partial_results[key][app])\n",
    "    #globals().update(resultados)\n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "d99b41cd-32c8-4fea-a0f1-a93c3b25810e",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "autofolio/autofolio_results_raw_v_1_all_seeds.pkl\n",
      "Test autofolio. All seeds\n"
     ]
    }
   ],
   "source": [
    "results_files = ['autofolio/autofolio_results_raw_v_1_all_seeds.pkl']\n",
    "results = load_and_merge_results(results_files)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f6003044-03e4-4222-a2d7-92353b050a9d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "globals().update(results)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "78302fcd-22eb-4942-ac48-1c8f496067d1",
   "metadata": {},
   "source": [
    "#### metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "1ddc885d-53ec-4b80-a118-88aae60a6af6",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy.stats import rankdata\n",
    "import copy\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "def calculate_avg_rank_dict(score_dict, fold_tasks=None):\n",
    "    avg_rank_dict = {}\n",
    "    \n",
    "    # Find unique approach names\n",
    "    approaches = set(name.rsplit('_', 1)[0] for name in score_dict.keys() if '_' in name)\n",
    "    \n",
    "    for approach in approaches:\n",
    "        rank_approach = f'{approach}_Rank'\n",
    "        score_approach = f'{approach}_Score'\n",
    "\n",
    "        if rank_approach in score_dict and score_approach in score_dict:\n",
    "            avg_rank_dict[rank_approach] = {}\n",
    "            avg_rank_dict[score_approach] = {}\n",
    "\n",
    "            for task_num in score_dict[rank_approach]:\n",
    "                if fold_tasks is None or task_num in fold_tasks:\n",
    "                    rank_scores = score_dict[rank_approach][task_num].iloc[:, 0].values\n",
    "                    score_scores = score_dict[score_approach][task_num].iloc[:, 0].values\n",
    "\n",
    "                    # Group metrics and compute rankings properly\n",
    "                    metrics_1 = [(rank_scores[0], rank_approach), (score_scores[0], score_approach)]\n",
    "\n",
    "                    # Sort metrics\n",
    "                    metrics_1.sort(reverse=True, key=lambda x: x[0])\n",
    "\n",
    "                    # Compute ranks taking ties into account\n",
    "                    ranks_1 = rankdata([-m[0] for m in metrics_1], method='min')\n",
    "\n",
    "                    rank_1_value = [ranks_1[i] for i in range(len(metrics_1)) if metrics_1[i][1] == rank_approach][0]\n",
    "                    score_1_value = [ranks_1[i] for i in range(len(metrics_1)) if metrics_1[i][1] == score_approach][0]\n",
    "\n",
    "                    if task_num not in avg_rank_dict[rank_approach]:\n",
    "                        avg_rank_dict[rank_approach][task_num] = pd.DataFrame(index=[0])\n",
    "                    if task_num not in avg_rank_dict[score_approach]:\n",
    "                        avg_rank_dict[score_approach][task_num] = pd.DataFrame(index=[0])\n",
    "\n",
    "                    avg_rank_dict[rank_approach][task_num].loc[0, 'rank'] = rank_1_value\n",
    "                    avg_rank_dict[score_approach][task_num].loc[0, 'rank'] = score_1_value\n",
    "\n",
    "    return avg_rank_dict\n",
    "\n",
    "def scale_task_score_values(task_dict, task_test_scores, func, params={}):\n",
    "    scaled_task_dict = {}\n",
    "    for task_num, df in task_dict.items():\n",
    "        if '_' in task_num:\n",
    "            task_id = int(task_num.split('_')[0])\n",
    "        scaler = func(**params).fit(task_test_scores[task_id].reshape(-1, 1))\n",
    "        scaled_df = df.applymap(lambda x: scaler.transform([[x]])[0][0])\n",
    "        scaled_task_dict[task_num] = scaled_df\n",
    "    return scaled_task_dict\n",
    "\n",
    "def scale_score_results(metric_dict, task_test_scores, func, params={}):\n",
    "    copied = copy.deepcopy(metric_dict)\n",
    "    return {approach: scale_task_score_values(tasks, task_test_scores, func, params) for approach, tasks in copied.items()}\n",
    "\n",
    "def scale_reg_results(amlb_reg_scores_results, reg_file):\n",
    "    reg_task_test_scores = {}\n",
    "    reg_results = pd.read_csv(reg_file, index_col=0)\n",
    "    reg_results['test_scores'] = reg_results.test_scores.apply(lambda x: -1 * (-x ** 2))\n",
    "    dic_ranges = (reg_results.groupby(['tid'])['test_scores'].max() - reg_results.groupby(['tid'])['test_scores'].min()).to_dict() \n",
    "    reg_results['test_scores'] = reg_results.apply(lambda row: row.test_scores/dic_ranges[row.tid], axis=1)\n",
    "    reg_task_test_scores.update(reg_results.groupby('tid')['test_scores'].apply(lambda x: np.array(x)).to_dict())\n",
    "    return scale_score_results(amlb_reg_scores_results, reg_task_test_scores, StandardScaler)\n",
    "\n",
    "def aggregate_metrics_to_dataframe(ndcg_dict, mrr_dict, score_dict, ttb_dict, avg_rank_dict, func, fold_tasks=None):\n",
    "    # Initialize a dictionary to store aggregated data\n",
    "    aggregated_data = {\n",
    "        'MRR@1': {},\n",
    "        'SCORE@1': {},\n",
    "        'AVG_RANK@1': {}\n",
    "    }\n",
    "    \n",
    "    # Function to calculate average of a metric across all tasks\n",
    "    def calculate_average_metric(metric_dict, metric_name):\n",
    "        for approach, tasks in metric_dict.items():\n",
    "            # Create a list to store values from all tasks\n",
    "            position_1_values = []\n",
    "            \n",
    "            # Iterate over all tasks and extract values\n",
    "            for task_num, df in tasks.items():\n",
    "                if fold_tasks is None or task_num in fold_tasks:\n",
    "                    position_1_values.append(df.iloc[0, 0])\n",
    "            \n",
    "            # Compute average for position\n",
    "            aggregated_data[f'{metric_name}@1'][approach] = func(position_1_values) \n",
    "\n",
    "    # Compute averages for MRR, SCORE, and AVG_RANK\n",
    "    calculate_average_metric(mrr_dict, 'MRR')\n",
    "    calculate_average_metric(score_dict, 'SCORE')\n",
    "    calculate_average_metric(avg_rank_dict, 'AVG_RANK')\n",
    "\n",
    "    # Create DataFrame from aggregated dictionary\n",
    "    df_aggregated = pd.DataFrame(aggregated_data)\n",
    "    \n",
    "    return df_aggregated\n",
    "\n",
    "from scipy.stats import wilcoxon\n",
    "\n",
    "def calculate_wilcoxon(ndcg_dict, mrr_dict, score_dict, ttb_dict, avg_rank_dict, score_less=False, ttb=True):    \n",
    "    wilcoxon_results = {\n",
    "        'MRR@1': {},\n",
    "        'SCORE@1': {},\n",
    "        'AVG_RANK@1': {}\n",
    "    }\n",
    "    # Function to compute Wilcoxon for a metric\n",
    "    def calculate_wilcoxon_metric(metric_dict, metric_name, score_less=False):\n",
    "        # Find unique approach names\n",
    "        approaches = set(name.rsplit('_', 1)[0] for name in metric_dict.keys() if '_' in name)\n",
    "        \n",
    "        for approach in approaches:\n",
    "            rank_values = []\n",
    "            score_values = []\n",
    "            \n",
    "            # Extract values for _Rank and _Score\n",
    "            if f'{approach}_Rank' in metric_dict and f'{approach}_Score' in metric_dict:\n",
    "                for task_num in metric_dict[f'{approach}_Rank']:\n",
    "                    rank_values.append(metric_dict[f'{approach}_Rank'][task_num].iloc[:, 0].values)\n",
    "                    score_values.append(metric_dict[f'{approach}_Score'][task_num].iloc[:, 0].values)\n",
    "                \n",
    "                # Convert lists to arrays for Wilcoxon computation\n",
    "                rank_values_1 = pd.concat([pd.Series(r[:1]) for r in rank_values])\n",
    "                score_values_1 = pd.concat([pd.Series(r[:1]) for r in score_values])\n",
    "                \n",
    "                # Compute Wilcoxon and store p-value\n",
    "                if score_less:\n",
    "                    _, p_value_1 = wilcoxon(rank_values_1, score_values_1, alternative='less')\n",
    "                else:\n",
    "                    _, p_value_1 = wilcoxon(rank_values_1, score_values_1, alternative='greater')\n",
    "\n",
    "                wilcoxon_results[f'{metric_name}@1'][approach] = p_value_1\n",
    "\n",
    "    # Compute Wilcoxon for MRR, SCORE, and AVG_RANK\n",
    "    calculate_wilcoxon_metric(mrr_dict, 'MRR')\n",
    "    calculate_wilcoxon_metric(score_dict, 'SCORE', score_less=score_less)\n",
    "    calculate_wilcoxon_metric(avg_rank_dict, 'AVG_RANK', score_less=True)  # AVG_RANK: lower is better\n",
    "    \n",
    "    # Create DataFrame from Wilcoxon results\n",
    "    df_wilcoxon = pd.DataFrame(wilcoxon_results)\n",
    "    \n",
    "    return df_wilcoxon\n",
    "\n",
    "def calculate_wilcoxon_complete(ndcg_dict, mrr_dict, score_dict, ttb_dict, avg_rank_dict, score_less=False, ttb=True):    \n",
    "    return calculate_wilcoxon(ndcg_dict, mrr_dict, score_dict, ttb_dict, avg_rank_dict, score_less, ttb)\n",
    "\n",
    "def get_difference_data(data):\n",
    "    # Create a new DataFrame for the differences\n",
    "    difference_data = {}\n",
    "\n",
    "    # Find unique approach names\n",
    "    approaches = set(idx.rsplit('_', 1)[0] for idx in data.index if '_' in idx)\n",
    "\n",
    "    # Calculate differences\n",
    "    for approach in approaches:\n",
    "        rank_row = data.loc[f'{approach}_Rank']\n",
    "        score_row = data.loc[f'{approach}_Score']\n",
    "        difference_data[approach] = rank_row - score_row\n",
    "        \n",
    "    return pd.DataFrame(difference_data).T\n",
    "\n",
    "def get_percentage_improvement_data(data):\n",
    "    # Create a new DataFrame for percentage improvement\n",
    "    improvement_data = {}\n",
    "\n",
    "    # Find unique approach names\n",
    "    approaches = set(idx.rsplit('_', 1)[0] for idx in data.index if '_' in idx)\n",
    "\n",
    "    # Calculate percentage improvement\n",
    "    for approach in approaches:\n",
    "        rank_row = data.loc[f'{approach}_Rank']\n",
    "        score_row = data.loc[f'{approach}_Score']\n",
    "        # Ensure we are not dividing by zero\n",
    "        improvement_data[approach] = ((rank_row - score_row) / score_row)\n",
    "\n",
    "    return pd.DataFrame(improvement_data).T\n",
    "\n",
    "def get_formatted_results(df):\n",
    "    # Separate into two levels: approach and score/rank\n",
    "    approach = [i.rsplit('_', 1)[0] for i in df.index]\n",
    "    metric = [i.rsplit('_', 1)[-1] if '_' in i else '' for i in df.index]\n",
    "\n",
    "    # Create the MultiIndex\n",
    "    multi_index = pd.MultiIndex.from_arrays([approach, metric], names=['approach', 'score_or_rank'])\n",
    "\n",
    "    # Assign MultiIndex to DataFrame\n",
    "    df_copy = df.copy()\n",
    "    df_copy.index = multi_index\n",
    "    return df_copy.sort_index(ascending=[True, False])\n",
    "\n",
    "def transformar_notacion_cientifica(df):\n",
    "    # Convert SCORE columns to scientific notation\n",
    "    columnas_score = [col for col in df.columns if \"SCORE\" in col]\n",
    "    df[columnas_score] = df[columnas_score].applymap(lambda x: f\"{x:.2e}\")\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e58a89e3-9e3d-437d-aa6c-0c79ab8d7303",
   "metadata": {},
   "source": [
    "### Visualize results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e3f4627b-0e2c-4ce1-bbfc-cf3d93379011",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "aslib_avg_rank_results = calculate_avg_rank_dict(aslib_scores_results)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d153a1d9-7db6-4648-81c6-e2b5427ded10",
   "metadata": {},
   "source": [
    "#### Resultados ASLib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "52c521d5-0f2f-4160-a5cf-defb2edf0f44",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "aslib_result = aggregate_metrics_to_dataframe(\n",
    "    {}, aslib_mrrs_results, aslib_scores_results,\n",
    "    {}, aslib_avg_rank_results, np.mean)\n",
    "\n",
    "aslib_result_mean_and_std = aggregate_metrics_to_dataframe(aslib_ndcg_results, aslib_mrrs_results, aslib_scores_results,\n",
    "                                                           aslib_ttbs_results, aslib_avg_rank_results, mean_and_std_sup)\n",
    "\n",
    "get_formatted_results(aslib_result_mean_and_std).to_csv('aslib_result_mean_and_std_autofolio.csv', decimal=',')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7887a8ac-3f2f-4ec3-83e3-fe9a3b3fb27d",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_13dcc_row0_col1, #T_13dcc_row1_col0, #T_13dcc_row1_col2 {\n",
       "  font-weight: bold;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_13dcc\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_13dcc_level0_col0\" class=\"col_heading level0 col0\" >MRR@1</th>\n",
       "      <th id=\"T_13dcc_level0_col1\" class=\"col_heading level0 col1\" >SCORE@1</th>\n",
       "      <th id=\"T_13dcc_level0_col2\" class=\"col_heading level0 col2\" >AVG_RANK@1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_13dcc_level0_row0\" class=\"row_heading level0 row0\" >AutoFolio_Rank</th>\n",
       "      <td id=\"T_13dcc_row0_col0\" class=\"data row0 col0\" >0.379560</td>\n",
       "      <td id=\"T_13dcc_row0_col1\" class=\"data row0 col1\" >0.853121</td>\n",
       "      <td id=\"T_13dcc_row0_col2\" class=\"data row0 col2\" >1.152381</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_13dcc_level0_row1\" class=\"row_heading level0 row1\" >AutoFolio_Score</th>\n",
       "      <td id=\"T_13dcc_row1_col0\" class=\"data row1 col0\" >0.387742</td>\n",
       "      <td id=\"T_13dcc_row1_col1\" class=\"data row1 col1\" >0.852103</td>\n",
       "      <td id=\"T_13dcc_row1_col2\" class=\"data row1 col2\" >1.145714</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x7f16e19149d0>"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_styled = rename_columns(aslib_result).sort_index().style.apply(highlight_best, axis=None)\n",
    "df_styled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "51c64394-ae4f-4164-9645-ae52019292ff",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MRR@1</th>\n",
       "      <th>SCORE@1</th>\n",
       "      <th>AVG_RANK@1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AutoFolio_Rank</th>\n",
       "      <td>0.380 $^{(0.32)}$</td>\n",
       "      <td>0.853 $^{(0.18)}$</td>\n",
       "      <td>1.152 $^{(0.36)}$</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AutoFolio_Score</th>\n",
       "      <td>0.388 $^{(0.32)}$</td>\n",
       "      <td>0.852 $^{(0.18)}$</td>\n",
       "      <td>1.146 $^{(0.35)}$</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                             MRR@1            SCORE@1         AVG_RANK@1\n",
       "AutoFolio_Rank   0.380 $^{(0.32)}$  0.853 $^{(0.18)}$  1.152 $^{(0.36)}$\n",
       "AutoFolio_Score  0.388 $^{(0.32)}$  0.852 $^{(0.18)}$  1.146 $^{(0.35)}$"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "aslib_result_mean_and_std"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "59dae566-9d78-487b-a646-7d20be84d608",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_684c1_row0_col1, #T_684c1_row1_col0, #T_684c1_row1_col2 {\n",
       "  font-weight: bold;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_684c1\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_684c1_level0_col0\" class=\"col_heading level0 col0\" >MRR@1</th>\n",
       "      <th id=\"T_684c1_level0_col1\" class=\"col_heading level0 col1\" >SCORE@1</th>\n",
       "      <th id=\"T_684c1_level0_col2\" class=\"col_heading level0 col2\" >AVG_RANK@1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_684c1_level0_row0\" class=\"row_heading level0 row0\" >AutoFolio_Rank</th>\n",
       "      <td id=\"T_684c1_row0_col0\" class=\"data row0 col0\" >0.379560</td>\n",
       "      <td id=\"T_684c1_row0_col1\" class=\"data row0 col1\" >0.853121</td>\n",
       "      <td id=\"T_684c1_row0_col2\" class=\"data row0 col2\" >1.152381</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th id=\"T_684c1_level0_row1\" class=\"row_heading level0 row1\" >AutoFolio_Score</th>\n",
       "      <td id=\"T_684c1_row1_col0\" class=\"data row1 col0\" >0.387742</td>\n",
       "      <td id=\"T_684c1_row1_col1\" class=\"data row1 col1\" >0.852103</td>\n",
       "      <td id=\"T_684c1_row1_col2\" class=\"data row1 col2\" >1.145714</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x7f16e103cac0>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_styled = rename_columns(aslib_result).sort_index().style.apply(highlight_best, axis=None)\n",
    "df_styled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "1b26891f-61e3-48d9-9518-57390951c232",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_c69a3_row0_col0, #T_c69a3_row0_col2 {\n",
       "  color: red;\n",
       "}\n",
       "#T_c69a3_row0_col1 {\n",
       "  color: green;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_c69a3\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_c69a3_level0_col0\" class=\"col_heading level0 col0\" >MRR@1</th>\n",
       "      <th id=\"T_c69a3_level0_col1\" class=\"col_heading level0 col1\" >SCORE@1</th>\n",
       "      <th id=\"T_c69a3_level0_col2\" class=\"col_heading level0 col2\" >AVG_RANK@1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_c69a3_level0_row0\" class=\"row_heading level0 row0\" >AutoFolio</th>\n",
       "      <td id=\"T_c69a3_row0_col0\" class=\"data row0 col0\" >-0.008182</td>\n",
       "      <td id=\"T_c69a3_row0_col1\" class=\"data row0 col1\" >0.001018</td>\n",
       "      <td id=\"T_c69a3_row0_col2\" class=\"data row0 col2\" >0.006667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x7f16e103ca60>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Crear el DataFrame de diferencias\n",
    "df_differences = get_difference_data(aslib_result)\n",
    "\n",
    "df_differences.to_csv('aslib_differences_autofolio.csv', decimal=',')\n",
    "\n",
    "df_styled = rename_columns(df_differences).sort_index().style.apply(highlight_best_differences, axis=None)\n",
    "df_styled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "c87644c8-7cd7-472a-a543-dae1770922b3",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_655ac_row0_col0, #T_655ac_row0_col2 {\n",
       "  color: red;\n",
       "}\n",
       "#T_655ac_row0_col1 {\n",
       "  color: green;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_655ac\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_655ac_level0_col0\" class=\"col_heading level0 col0\" >MRR@1</th>\n",
       "      <th id=\"T_655ac_level0_col1\" class=\"col_heading level0 col1\" >SCORE@1</th>\n",
       "      <th id=\"T_655ac_level0_col2\" class=\"col_heading level0 col2\" >AVG_RANK@1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_655ac_level0_row0\" class=\"row_heading level0 row0\" >AutoFolio</th>\n",
       "      <td id=\"T_655ac_row0_col0\" class=\"data row0 col0\" >-0.021102</td>\n",
       "      <td id=\"T_655ac_row0_col1\" class=\"data row0 col1\" >0.001194</td>\n",
       "      <td id=\"T_655ac_row0_col2\" class=\"data row0 col2\" >0.005819</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x7f16e1044c70>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Crear el DataFrame de diferencias\n",
    "df_improvements = get_percentage_improvement_data(aslib_result)\n",
    "\n",
    "df_improvements.to_csv('aslib_improvements_autofolio.csv', decimal=',')\n",
    "\n",
    "df_styled = rename_columns(df_improvements).sort_index().style.apply(highlight_best_differences, axis=None)\n",
    "df_styled"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "768b176d-1146-46de-a587-92c3299a2fec",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
       "#T_12af4_row0_col0, #T_12af4_row0_col1, #T_12af4_row0_col2 {\n",
       "  color: black;\n",
       "}\n",
       "</style>\n",
       "<table id=\"T_12af4\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
       "      <th id=\"T_12af4_level0_col0\" class=\"col_heading level0 col0\" >MRR@1</th>\n",
       "      <th id=\"T_12af4_level0_col1\" class=\"col_heading level0 col1\" >SCORE@1</th>\n",
       "      <th id=\"T_12af4_level0_col2\" class=\"col_heading level0 col2\" >AVG_RANK@1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th id=\"T_12af4_level0_row0\" class=\"row_heading level0 row0\" >AutoFolio</th>\n",
       "      <td id=\"T_12af4_row0_col0\" class=\"data row0 col0\" >0.879218</td>\n",
       "      <td id=\"T_12af4_row0_col1\" class=\"data row0 col1\" >0.248371</td>\n",
       "      <td id=\"T_12af4_row0_col2\" class=\"data row0 col2\" >0.653823</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
       "<pandas.io.formats.style.Styler at 0x7f16e107d880>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Ejemplo de uso:\n",
    "df_wilcoxon_results = calculate_wilcoxon(aslib_ndcg_results, aslib_mrrs_results, aslib_scores_results, aslib_ttbs_results, aslib_avg_rank_results, ttb=False)\n",
    "\n",
    "#df_wilcoxon_results.to_csv('aslib_wilcoxon_autofolio.csv', decimal=',')\n",
    "\n",
    "df_styled_wil = rename_columns(df_wilcoxon_results).sort_index().style.apply(highlight_pvalues)\n",
    "df_styled_wil"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
