{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import re\n",
    "import pandas as pd\n",
    "\n",
    "model_names = [\n",
    "    \"iTransformer\", \"SAITS\", \"NonstationaryTransformer\", \"ETSformer\", \"PatchTST\", \"Crossformer\", \"Informer\", \"Autoformer\", \"Pyraformer\", \"Transformer\", \n",
    "    \"BRITS\", \"MRNN\", \"GRUD\", \n",
    "    \"TimesNet\", \"MICN\", \"SCINet\", \n",
    "    \"StemGNN\", \n",
    "    \"FreTS\", \"Koopa\", \"DLinear\", \"FiLM\", \n",
    "    \"CSDI\", \"USGAN\", \"GPVAE\"\n",
    "]\n",
    "\n",
    "metrics_pattern = re.compile(r\"MAE=(\\d+\\.\\d+) ± (\\d+\\.\\d+), MSE=(\\d+\\.\\d+) ± (\\d+\\.\\d+), MRE=(\\d+\\.\\d+) ± (\\d+\\.\\d+), average inference time=(\\d+\\.\\d+)\")\n",
    "params_pattern = re.compile(r\"the number of trainable parameters: ([\\d,]+)\")\n",
    "\n",
    "def extract_and_format_naive_classification(content):\n",
    "    imputation_methods = ['Mean', 'Median', 'LOCF', 'Linear']\n",
    "    data = {\n",
    "        \"methods\": ['PR_AUC w XGB', 'PR_AUC w RNN', 'PR_AUC w Transformer', \n",
    "                    'ROC_AUC w XGB', 'ROC_AUC w RNN', 'ROC_AUC w Transformer']\n",
    "    }\n",
    "    formatted_data = {method: [] for method in imputation_methods}\n",
    "    \n",
    "    current_method_index = 0\n",
    "    for line in content:\n",
    "        if match:= re.match(r\"(\\w+)\\s+with\\s+\\w+\\s+imputation\\s+PR_AUC:\\s+([\\d.]+)±([\\d.]+),\\s+ROC_AUC:\\s+([\\d.]+)±([\\d.]+)\", line):\n",
    "            method, pr_auc_mean, pr_auc_std, roc_auc_mean, roc_auc_std = match.groups()\n",
    "            formatted_data[imputation_methods[current_method_index]].append(f\"{float(pr_auc_mean):.3f} ({float(pr_auc_std):.3f})\")\n",
    "            formatted_data[imputation_methods[current_method_index]].append(f\"{float(roc_auc_mean):.3f} ({float(roc_auc_std):.3f})\")\n",
    "            \n",
    "            # Move to the next imputation method after every three lines\n",
    "            if len(formatted_data[imputation_methods[current_method_index]]) == 6:\n",
    "                current_method_index += 1\n",
    "    \n",
    "    # Convert to DataFrame for better visualization\n",
    "    final_data = {\"name\": [], \"PR_AUC w XGB\": [], \"PR_AUC w RNN\": [], \"PR_AUC w Transformer\": [], \n",
    "                  \"ROC_AUC w XGB\": [], \"ROC_AUC w RNN\": [], \"ROC_AUC w Transformer\": []}\n",
    "    \n",
    "    for method in imputation_methods:\n",
    "        final_data[\"name\"].append(method)\n",
    "        final_data[\"PR_AUC w XGB\"].append(formatted_data[method][0])\n",
    "        final_data[\"PR_AUC w RNN\"].append(formatted_data[method][2])\n",
    "        final_data[\"PR_AUC w Transformer\"].append(formatted_data[method][4])\n",
    "        final_data[\"ROC_AUC w XGB\"].append(formatted_data[method][1])\n",
    "        final_data[\"ROC_AUC w RNN\"].append(formatted_data[method][3])\n",
    "        final_data[\"ROC_AUC w Transformer\"].append(formatted_data[method][5])\n",
    "    \n",
    "    return final_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "for dataset in [\"BeijingAir\", \"Electricity\", \"ETT_h1\", \"ItalyAir\", \"Pedestrian\", \"PeMS\", \"PhysioNet2012\", \"PhysioNet2019\"]:\n",
    "    log_dir = f\"./imputation_log/point01_log/{dataset}_log\"\n",
    "    # Dictionary to store the results\n",
    "    results = {\n",
    "        \"Model\": [],\n",
    "        \"Size\": [],\n",
    "        \"MAE\": [],\n",
    "        \"MSE\": [],\n",
    "        \"MRE\": [],\n",
    "        \"Time\": []\n",
    "    }\n",
    "\n",
    "    # Loop through each model's log file and extract metrics\n",
    "    for model in model_names:\n",
    "        file_path = os.path.join(log_dir, f\"{model}_{dataset}.log\")\n",
    "        if os.path.exists(file_path):\n",
    "            with open(file_path, 'r') as file:\n",
    "                lines = file.readlines()\n",
    "                params = \"0\"\n",
    "                for line in lines:\n",
    "                    if params_pattern.search(line):\n",
    "                        params_match = params_pattern.search(line)\n",
    "                        params = params_match.group(1)\n",
    "                    if metrics_pattern.search(line):\n",
    "                        metrics_match = metrics_pattern.search(line)\n",
    "                        if metrics_match:\n",
    "                            mae, mae_std, mse, mse_std, mre, mre_std, time = metrics_match.groups()\n",
    "                            results[\"Model\"].append(model)\n",
    "                            results[\"Size\"].append(params)\n",
    "                            results[\"MAE\"].append(f\"{float(mae):.3f} ({float(mae_std):.3f})\")\n",
    "                            results[\"MSE\"].append(f\"{float(mse):.3f} ({float(mse_std):.3f})\")\n",
    "                            results[\"MRE\"].append(f\"{float(mre):.3f} ({float(mre_std):.3f})\")\n",
    "                            results[\"Time\"].append(time)\n",
    "                            break\n",
    "                else:\n",
    "                    results[\"Model\"].append(model)\n",
    "                    results[\"Size\"].append(params)\n",
    "                    results[\"MAE\"].append(\"0\")\n",
    "                    results[\"MSE\"].append(\"0\")\n",
    "                    results[\"MRE\"].append(\"0\")\n",
    "                    results[\"Time\"].append(\"0\")\n",
    "        else:\n",
    "            results[\"Model\"].append(model)\n",
    "            results[\"Size\"].append(\"0\")\n",
    "            results[\"MAE\"].append(\"0\")\n",
    "            results[\"MSE\"].append(\"0\")\n",
    "            results[\"MRE\"].append(\"0\")\n",
    "            results[\"Time\"].append(\"0\")\n",
    "\n",
    "    # Create a DataFrame from the results\n",
    "    df = pd.DataFrame(results)\n",
    "    df.to_csv(f\"./results/imputation/point01/{dataset}.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "for dataset in [\"BeijingAir\", \"Electricity\", \"ETT_h1\", \"ItalyAir\", \"Pedestrian\", \"PeMS\"]:\n",
    "    log_dir = f\"./imputation_log/point05_log/{dataset}_log\"\n",
    "\n",
    "    results = {\n",
    "        \"Model\": [],\n",
    "        \"Size\": [],\n",
    "        \"MAE\": [],\n",
    "        \"MSE\": [],\n",
    "        \"MRE\": [],\n",
    "        \"Time\": []\n",
    "    }\n",
    "\n",
    "    # Loop through each model's log file and extract metrics\n",
    "    for model in model_names:\n",
    "        file_path = os.path.join(log_dir, f\"{model}_{dataset}.log\")\n",
    "        if os.path.exists(file_path):\n",
    "            with open(file_path, 'r') as file:\n",
    "                lines = file.readlines()\n",
    "                params = \"0\"\n",
    "                for line in lines:\n",
    "                    if params_pattern.search(line):\n",
    "                        params_match = params_pattern.search(line)\n",
    "                        params = params_match.group(1)\n",
    "                    if metrics_pattern.search(line):\n",
    "                        metrics_match = metrics_pattern.search(line)\n",
    "                        if metrics_match:\n",
    "                            mae, mae_std, mse, mse_std, mre, mre_std, time = metrics_match.groups()\n",
    "                            results[\"Model\"].append(model)\n",
    "                            results[\"Size\"].append(params)\n",
    "                            results[\"MAE\"].append(f\"{float(mae):.3f} ({float(mae_std):.3f})\")\n",
    "                            results[\"MSE\"].append(f\"{float(mse):.3f} ({float(mse_std):.3f})\")\n",
    "                            results[\"MRE\"].append(f\"{float(mre):.3f} ({float(mre_std):.3f})\")\n",
    "                            results[\"Time\"].append(time)\n",
    "                            break\n",
    "                else:\n",
    "                    results[\"Model\"].append(model)\n",
    "                    results[\"Size\"].append(params)\n",
    "                    results[\"MAE\"].append(\"0\")\n",
    "                    results[\"MSE\"].append(\"0\")\n",
    "                    results[\"MRE\"].append(\"0\")\n",
    "                    results[\"Time\"].append(\"0\")\n",
    "        else:\n",
    "            results[\"Model\"].append(model)\n",
    "            results[\"Size\"].append(\"0\")\n",
    "            results[\"MAE\"].append(\"0\")\n",
    "            results[\"MSE\"].append(\"0\")\n",
    "            results[\"MRE\"].append(\"0\")\n",
    "            results[\"Time\"].append(\"0\")\n",
    "\n",
    "    # Create a DataFrame from the results\n",
    "    df = pd.DataFrame(results)\n",
    "    df.to_csv(f\"./results/imputation/point05/{dataset}.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "for dataset in [\"BeijingAir\", \"Electricity\", \"ETT_h1\", \"ItalyAir\", \"Pedestrian\", \"PeMS\"]:\n",
    "    log_dir = f\"./imputation_log/point09_log/{dataset}_log\"\n",
    "\n",
    "    # Dictionary to store the results\n",
    "    results = {\n",
    "        \"Model\": [],\n",
    "        \"Size\": [],\n",
    "        \"MAE\": [],\n",
    "        \"MSE\": [],\n",
    "        \"MRE\": [],\n",
    "        \"Time\": []\n",
    "    }\n",
    "\n",
    "    # Loop through each model's log file and extract metrics\n",
    "    for model in model_names:\n",
    "        file_path = os.path.join(log_dir, f\"{model}_{dataset}.log\")\n",
    "        if os.path.exists(file_path):\n",
    "            with open(file_path, 'r') as file:\n",
    "                lines = file.readlines()\n",
    "                params = \"0\"\n",
    "                for line in lines:\n",
    "                    if params_pattern.search(line):\n",
    "                        params_match = params_pattern.search(line)\n",
    "                        params = params_match.group(1)\n",
    "                    if metrics_pattern.search(line):\n",
    "                        metrics_match = metrics_pattern.search(line)\n",
    "                        if metrics_match:\n",
    "                            mae, mae_std, mse, mse_std, mre, mre_std, time = metrics_match.groups()\n",
    "                            results[\"Model\"].append(model)\n",
    "                            results[\"Size\"].append(params)\n",
    "                            results[\"MAE\"].append(f\"{float(mae):.3f} ({float(mae_std):.3f})\")\n",
    "                            results[\"MSE\"].append(f\"{float(mse):.3f} ({float(mse_std):.3f})\")\n",
    "                            results[\"MRE\"].append(f\"{float(mre):.3f} ({float(mre_std):.3f})\")\n",
    "                            results[\"Time\"].append(time)\n",
    "                            break\n",
    "                else:\n",
    "                    results[\"Model\"].append(model)\n",
    "                    results[\"Size\"].append(params)\n",
    "                    results[\"MAE\"].append(\"0\")\n",
    "                    results[\"MSE\"].append(\"0\")\n",
    "                    results[\"MRE\"].append(\"0\")\n",
    "                    results[\"Time\"].append(\"0\")\n",
    "        else:\n",
    "            results[\"Model\"].append(model)\n",
    "            results[\"Size\"].append(\"0\")\n",
    "            results[\"MAE\"].append(\"0\")\n",
    "            results[\"MSE\"].append(\"0\")\n",
    "            results[\"MRE\"].append(\"0\")\n",
    "            results[\"Time\"].append(\"0\")\n",
    "\n",
    "    # Create a DataFrame from the results\n",
    "    df = pd.DataFrame(results)\n",
    "    df.to_csv(f\"./results/imputation/point09/{dataset}.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "for dataset in [\"BeijingAir\", \"Electricity\", \"ETT_h1\", \"ItalyAir\", \"PeMS\"]:\n",
    "    log_dir = f\"./imputation_log/block05_log/{dataset}_log\"\n",
    "\n",
    "    # Dictionary to store the results\n",
    "    results = {\n",
    "        \"Model\": [],\n",
    "        \"Size\": [],\n",
    "        \"MAE\": [],\n",
    "        \"MSE\": [],\n",
    "        \"MRE\": [],\n",
    "        \"Time\": []\n",
    "    }\n",
    "\n",
    "    # Loop through each model's log file and extract metrics\n",
    "    for model in model_names:\n",
    "        file_path = os.path.join(log_dir, f\"{model}_{dataset}.log\")\n",
    "        if os.path.exists(file_path):\n",
    "            with open(file_path, 'r') as file:\n",
    "                lines = file.readlines()\n",
    "                params = \"0\"\n",
    "                for line in lines:\n",
    "                    if params_pattern.search(line):\n",
    "                        params_match = params_pattern.search(line)\n",
    "                        params = params_match.group(1)\n",
    "                    if metrics_pattern.search(line):\n",
    "                        metrics_match = metrics_pattern.search(line)\n",
    "                        if metrics_match:\n",
    "                            mae, mae_std, mse, mse_std, mre, mre_std, time = metrics_match.groups()\n",
    "                            results[\"Model\"].append(model)\n",
    "                            results[\"Size\"].append(params)\n",
    "                            results[\"MAE\"].append(f\"{float(mae):.3f} ({float(mae_std):.3f})\")\n",
    "                            results[\"MSE\"].append(f\"{float(mse):.3f} ({float(mse_std):.3f})\")\n",
    "                            results[\"MRE\"].append(f\"{float(mre):.3f} ({float(mre_std):.3f})\")\n",
    "                            results[\"Time\"].append(time)\n",
    "                            break\n",
    "                else:\n",
    "                    results[\"Model\"].append(model)\n",
    "                    results[\"Size\"].append(params)\n",
    "                    results[\"MAE\"].append(\"0\")\n",
    "                    results[\"MSE\"].append(\"0\")\n",
    "                    results[\"MRE\"].append(\"0\")\n",
    "                    results[\"Time\"].append(\"0\")\n",
    "        else:\n",
    "            results[\"Model\"].append(model)\n",
    "            results[\"Size\"].append(\"0\")\n",
    "            results[\"MAE\"].append(\"0\")\n",
    "            results[\"MSE\"].append(\"0\")\n",
    "            results[\"MRE\"].append(\"0\")\n",
    "            results[\"Time\"].append(\"0\")\n",
    "\n",
    "    # Create a DataFrame from the results\n",
    "    df = pd.DataFrame(results)\n",
    "    df.to_csv(f\"./results/imputation/block05/{dataset}.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "for dataset in [\"BeijingAir\", \"Electricity\", \"ETT_h1\", \"ItalyAir\", \"Pedestrian\", \"PeMS\"]:\n",
    "    log_dir = f\"./imputation_log/subseq05_log/{dataset}_log\"\n",
    "\n",
    "    # Dictionary to store the results\n",
    "    results = {\n",
    "        \"Model\": [],\n",
    "        \"Size\": [],\n",
    "        \"MAE\": [],\n",
    "        \"MSE\": [],\n",
    "        \"MRE\": [],\n",
    "        \"Time\": []\n",
    "    }\n",
    "\n",
    "    # Loop through each model's log file and extract metrics\n",
    "    for model in model_names:\n",
    "        file_path = os.path.join(log_dir, f\"{model}_{dataset}.log\")\n",
    "        if os.path.exists(file_path):\n",
    "            with open(file_path, 'r') as file:\n",
    "                lines = file.readlines()\n",
    "                params = \"0\"\n",
    "                for line in lines:\n",
    "                    if params_pattern.search(line):\n",
    "                        params_match = params_pattern.search(line)\n",
    "                        params = params_match.group(1)\n",
    "                    if metrics_pattern.search(line):\n",
    "                        metrics_match = metrics_pattern.search(line)\n",
    "                        if metrics_match:\n",
    "                            mae, mae_std, mse, mse_std, mre, mre_std, time = metrics_match.groups()\n",
    "                            results[\"Model\"].append(model)\n",
    "                            results[\"Size\"].append(params)\n",
    "                            results[\"MAE\"].append(f\"{float(mae):.3f} ({float(mae_std):.3f})\")\n",
    "                            results[\"MSE\"].append(f\"{float(mse):.3f} ({float(mse_std):.3f})\")\n",
    "                            results[\"MRE\"].append(f\"{float(mre):.3f} ({float(mre_std):.3f})\")\n",
    "                            results[\"Time\"].append(time)\n",
    "                            break\n",
    "                else:\n",
    "                    results[\"Model\"].append(model)\n",
    "                    results[\"Size\"].append(params)\n",
    "                    results[\"MAE\"].append(\"0\")\n",
    "                    results[\"MSE\"].append(\"0\")\n",
    "                    results[\"MRE\"].append(\"0\")\n",
    "                    results[\"Time\"].append(\"0\")\n",
    "        else:\n",
    "            results[\"Model\"].append(model)\n",
    "            results[\"Size\"].append(\"0\")\n",
    "            results[\"MAE\"].append(\"0\")\n",
    "            results[\"MSE\"].append(\"0\")\n",
    "            results[\"MRE\"].append(\"0\")\n",
    "            results[\"Time\"].append(\"0\")\n",
    "\n",
    "    # Create a DataFrame from the results\n",
    "    df = pd.DataFrame(results)\n",
    "    df.to_csv(f\"./results/imputation/subseq05/{dataset}.csv\", index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = './naive_log/naive_imputation.log'\n",
    "with open(file_path, 'r') as file:\n",
    "    log_content = file.read()\n",
    "\n",
    "# Extract type and dataset from the \"Successfully saved the given data into data/\" lines\n",
    "save_pattern = re.compile(r'Successfully saved the given data into data/([^/]+)/([^/]+)')\n",
    "save_matches = save_pattern.findall(log_content)\n",
    "\n",
    "# Extract the metrics from the log\n",
    "metric_pattern = re.compile(r'\\[(.*?)\\]: (.*?) imputation MAE: (.*?), MSE: (.*?), MRE: (.*?)\\n')\n",
    "metric_matches = metric_pattern.findall(log_content)\n",
    "\n",
    "# Prepare lists to store extracted data\n",
    "types = []\n",
    "datasets = []\n",
    "methods = []\n",
    "maes = []\n",
    "mses = []\n",
    "mres = []\n",
    "\n",
    "# Iterate over the matches and extract data\n",
    "for i, (log_time, method, mae, mse, mre) in enumerate(metric_matches):\n",
    "    log_type, dataset = save_matches[i // 4]  # Each save line corresponds to 4 metric lines\n",
    "    types.append(log_type)\n",
    "    datasets.append(dataset)\n",
    "    methods.append(method)\n",
    "    maes.append(round(float(mae), 3))\n",
    "    mses.append(round(float(mse), 3))\n",
    "    mres.append(round(float(mre), 3))\n",
    "\n",
    "# Create DataFrame\n",
    "data = {\n",
    "    'type': types,\n",
    "    'dataset': datasets,\n",
    "    'method': methods,\n",
    "    'mae': maes,\n",
    "    'mse': mses,\n",
    "    'mre': mres\n",
    "}\n",
    "\n",
    "df = pd.DataFrame(data)\n",
    "df.to_csv(f\"results/naive_imputation.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "datasets = [\"PhysioNet2012_point_rate01_log\", \"Pedestrian_subseq_rate05_log\", \"Pedestrian_point_rate09_log\", \"Pedestrian_point_rate05_log\", \"Pedestrian_point_rate01_log\"]\n",
    "for dataset in datasets:\n",
    "    # Define the path to the log directory\n",
    "    log_dir = f\"classification_log/{dataset}\"\n",
    "\n",
    "    # Initialize a dictionary to store the results for all models\n",
    "    all_results = {\n",
    "        \"Model\": [],\n",
    "        \"PR_AUC wt XGB\": [],\n",
    "        \"PR_AUC w XGB\": [],\n",
    "        \"PR_AUC w RNN\": [],\n",
    "        \"PR_AUC w Transformer\": [],\n",
    "        \"ROC_AUC wt XGB\": [],\n",
    "        \"ROC_AUC w XGB\": [],\n",
    "        \"ROC_AUC w RNN\": [],\n",
    "        \"ROC_AUC w Transformer\": []\n",
    "    }\n",
    "\n",
    "    # Loop through each model\n",
    "    for model in model_names:\n",
    "        # Define regex patterns to extract the required PR AUC metrics with confidence intervals\n",
    "        xgb_without_pattern = re.compile(f\"XGB without imputation PR_AUC: (\\d+\\.\\d+)±(\\d+\\.\\d+), ROC_AUC: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "        xgb_with_pattern = re.compile(f\"XGB with {model} imputation PR_AUC: (\\d+\\.\\d+)±(\\d+\\.\\d+), ROC_AUC: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "        rnn_with_pattern = re.compile(f\"RNN with {model} imputation PR_AUC: (\\d+\\.\\d+)±(\\d+\\.\\d+), ROC_AUC: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "        transformer_with_pattern = re.compile(f\"Transformer with {model} imputation PR_AUC: (\\d+\\.\\d+)±(\\d+\\.\\d+), ROC_AUC: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "\n",
    "        # Dictionary to store the results for the current model\n",
    "        results = {\n",
    "            \"PR_AUC wt XGB\": \"0\",\n",
    "            \"PR_AUC w XGB\": \"0\",\n",
    "            \"PR_AUC w RNN\": \"0\",\n",
    "            \"PR_AUC w Transformer\": \"0\",\n",
    "            \"ROC_AUC wt XGB\": \"0\",\n",
    "            \"ROC_AUC w XGB\": \"0\",\n",
    "            \"ROC_AUC w RNN\": \"0\",\n",
    "            \"ROC_AUC w Transformer\": \"0\"\n",
    "        }\n",
    "        if 'PhysioNet2012' in dataset:\n",
    "            name = 'PhysioNet2012'\n",
    "        elif 'Pedestrian' in dataset:\n",
    "            name = 'Pedestrian'\n",
    "\n",
    "        # Define the path to the model's log file\n",
    "        log_file_path = os.path.join(log_dir, f\"{model}_{name}.log\")\n",
    "        # Read the log file and extract the required metrics with confidence intervals\n",
    "        if os.path.exists(log_file_path):\n",
    "            with open(log_file_path, 'r') as file:\n",
    "                lines = file.readlines()\n",
    "                for line in lines:\n",
    "                    if xgb_without_pattern.search(line):\n",
    "                        xgb_without_match = xgb_without_pattern.search(line)\n",
    "                        if xgb_without_match:\n",
    "                            pr_auc, pr_conf_int, roc_auc, roc_conf_int = xgb_without_match.groups()\n",
    "                            results[\"PR_AUC wt XGB\"] = f\"{float(pr_auc):.3f} ({float(pr_conf_int):.3f})\"\n",
    "                            results[\"ROC_AUC wt XGB\"] = f\"{float(roc_auc):.3f} ({float(roc_conf_int):.3f})\"\n",
    "                    if xgb_with_pattern.search(line):\n",
    "                        xgb_with_match = xgb_with_pattern.search(line)\n",
    "                        if xgb_with_match:\n",
    "                            pr_auc, pr_conf_int, roc_auc, roc_conf_int = xgb_with_match.groups()\n",
    "                            results[\"PR_AUC w XGB\"] = f\"{float(pr_auc):.3f} ({float(pr_conf_int):.3f})\"\n",
    "                            results[\"ROC_AUC w XGB\"] = f\"{float(roc_auc):.3f} ({float(roc_conf_int):.3f})\"\n",
    "                    if rnn_with_pattern.search(line):\n",
    "                        rnn_with_match = rnn_with_pattern.search(line)\n",
    "                        if rnn_with_match:\n",
    "                            pr_auc, pr_conf_int, roc_auc, roc_conf_int = rnn_with_match.groups()\n",
    "                            results[\"PR_AUC w RNN\"] = f\"{float(pr_auc):.3f} ({float(pr_conf_int):.3f})\"\n",
    "                            results[\"ROC_AUC w RNN\"] = f\"{float(roc_auc):.3f} ({float(roc_conf_int):.3f})\"\n",
    "                    if transformer_with_pattern.search(line):\n",
    "                        transformer_with_match = transformer_with_pattern.search(line)\n",
    "                        if transformer_with_match:\n",
    "                            pr_auc, pr_conf_int, roc_auc, roc_conf_int = transformer_with_match.groups()\n",
    "                            results[\"PR_AUC w Transformer\"] = f\"{float(pr_auc):.3f} ({float(pr_conf_int):.3f})\"\n",
    "                            results[\"ROC_AUC w Transformer\"] = f\"{float(roc_auc):.3f} ({float(roc_conf_int):.3f})\"\n",
    "\n",
    "        # Append the results for the current model to the overall results\n",
    "        all_results[\"Model\"].append(model)\n",
    "        all_results[\"PR_AUC wt XGB\"].append(results[\"PR_AUC wt XGB\"])\n",
    "        all_results[\"PR_AUC w XGB\"].append(results[\"PR_AUC w XGB\"])\n",
    "        all_results[\"PR_AUC w RNN\"].append(results[\"PR_AUC w RNN\"])\n",
    "        all_results[\"PR_AUC w Transformer\"].append(results[\"PR_AUC w Transformer\"])\n",
    "        all_results[\"ROC_AUC wt XGB\"].append(results[\"ROC_AUC wt XGB\"])\n",
    "        all_results[\"ROC_AUC w XGB\"].append(results[\"ROC_AUC w XGB\"])\n",
    "        all_results[\"ROC_AUC w RNN\"].append(results[\"ROC_AUC w RNN\"])\n",
    "        all_results[\"ROC_AUC w Transformer\"].append(results[\"ROC_AUC w Transformer\"])\n",
    "\n",
    "    df_all = pd.DataFrame(all_results)\n",
    "    df_all.to_csv(f\"./results/classification/{dataset[:-4]}.csv\", index=False)\n",
    "\n",
    "datasets = [\"PhysioNet2012\", \"Pedestrian_subseq05\",\"Pedestrian_point01\",\"Pedestrian_point05\",\"Pedestrian_point09\"]\n",
    "# Read the content of the file\n",
    "for dataset in datasets:\n",
    "    file_path = f'./naive_log/downstream_classification_naive_{dataset}.log'\n",
    "    with open(file_path, 'r') as file:\n",
    "        content = file.readlines()\n",
    "    # Reformat the data\n",
    "    reformatted_data = extract_and_format_naive_classification(content)\n",
    "\n",
    "    df_final = pd.DataFrame(reformatted_data)\n",
    "    df_final.to_csv(f\"./results/classification/downstream_classification_naive_{dataset}.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "datasets = [\"ETT_h1_point_rate01\",\"ETT_h1_block_rate05\", \"ETT_h1_point_rate05\", \"ETT_h1_subseq_rate05\",\n",
    "            \"PeMS_block_rate05\", \"PeMS_point_rate05\",\"PeMS_subseq_rate05\"]\n",
    "\n",
    "# Define regex patterns to extract the required metrics with confidence intervals\n",
    "xgb_without_pattern = re.compile(r\"XGB \\(without imputation\\) regression MAE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MSE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MRE: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "xgb_with_pattern = re.compile(r\"XGB \\(with (.+) imputation\\) regression MAE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MSE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MRE: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "rnn_with_pattern = re.compile(r\"RNN \\(with (.+) imputation\\) regression MAE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MSE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MRE: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "transformer_with_pattern = re.compile(r\"Transformer \\(with (.+) imputation\\) regression MAE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MSE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MRE: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "\n",
    "# Loop through each dataset\n",
    "for dataset in datasets:\n",
    "    # Define the path to the log directory\n",
    "    log_dir = f\"./regression_log/{dataset}_log\"\n",
    "    # Initialize a dictionary to store the results for all models\n",
    "    all_results = {\n",
    "        \"Model\": [],\n",
    "        \"MAE wt XGB\": [],\n",
    "        \"MRE wt XGB\": [],\n",
    "        \"MSE wt XGB\": [],\n",
    "        \"MAE w XGB\": [],\n",
    "        \"MRE w XGB\": [],\n",
    "        \"MSE w XGB\": [],\n",
    "        \"MAE w RNN\": [],\n",
    "        \"MRE w RNN\": [],\n",
    "        \"MSE w RNN\": [],\n",
    "        \"MAE w Transformer\": [],\n",
    "        \"MRE w Transformer\": [],\n",
    "        \"MSE w Transformer\": []\n",
    "    }\n",
    "    if 'ETT_h1' in dataset:\n",
    "        dataset_name = 'ETT_h1'\n",
    "    elif 'PeMS' in dataset:\n",
    "        dataset_name = 'PeMS'\n",
    "    # Loop through each model\n",
    "    for model in model_names:\n",
    "        log_file_path = os.path.join(log_dir, f\"{model}_{dataset_name}.log\")\n",
    "        if os.path.exists(log_file_path):\n",
    "            results = {\n",
    "                \"Model\": model,\n",
    "                \"MAE wt XGB\": \"\",\n",
    "                \"MRE wt XGB\": \"\",\n",
    "                \"MSE wt XGB\": \"\",\n",
    "                \"MAE w XGB\": \"\",\n",
    "                \"MRE w XGB\": \"\",\n",
    "                \"MSE w XGB\": \"\",\n",
    "                \"MAE w RNN\": \"\",\n",
    "                \"MRE w RNN\": \"\",\n",
    "                \"MSE w RNN\": \"\",\n",
    "                \"MAE w Transformer\": \"\",\n",
    "                \"MRE w Transformer\": \"\",\n",
    "                \"MSE w Transformer\": \"\"\n",
    "            }\n",
    "            \n",
    "            with open(log_file_path, 'r') as file:\n",
    "                lines = file.readlines()\n",
    "                for line in lines:\n",
    "                    if xgb_without_pattern.search(line):\n",
    "                        xgb_without_match = xgb_without_pattern.search(line)\n",
    "                        if xgb_without_match:\n",
    "                            mae, mae_std, mse, mse_std, mre, mre_std = xgb_without_match.groups()\n",
    "                            results[\"MAE wt XGB\"] = f\"{float(mae):.3f} ({float(mae_std):.3f})\"\n",
    "                            results[\"MSE wt XGB\"] = f\"{float(mse):.3f} ({float(mse_std):.3f})\"\n",
    "                            results[\"MRE wt XGB\"] = f\"{float(mre):.3f} ({float(mre_std):.3f})\"\n",
    "                    if xgb_with_pattern.search(line):\n",
    "                        xgb_with_match = xgb_with_pattern.search(line)\n",
    "                        if xgb_with_match:\n",
    "                            _, mae, mae_std, mse, mse_std, mre, mre_std = xgb_with_match.groups()\n",
    "                            results[\"MAE w XGB\"] = f\"{float(mae):.3f} ({float(mae_std):.3f})\"\n",
    "                            results[\"MRE w XGB\"] = f\"{float(mre):.3f} ({float(mre_std):.3f})\"\n",
    "                            results[\"MSE w XGB\"] = f\"{float(mse):.3f} ({float(mse_std):.3f})\"\n",
    "                    if rnn_with_pattern.search(line):\n",
    "                        rnn_with_match = rnn_with_pattern.search(line)\n",
    "                        if rnn_with_match:\n",
    "                            _, mae, mae_std, mse, mse_std, mre, mre_std = rnn_with_match.groups()\n",
    "                            results[\"MAE w RNN\"] = f\"{float(mae):.3f} ({float(mae_std):.3f})\"\n",
    "                            results[\"MRE w RNN\"] = f\"{float(mre):.3f} ({float(mre_std):.3f})\"\n",
    "                            results[\"MSE w RNN\"] = f\"{float(mse):.3f} ({float(mse_std):.3f})\"\n",
    "                    if transformer_with_pattern.search(line):\n",
    "                        transformer_with_match = transformer_with_pattern.search(line)\n",
    "                        if transformer_with_match:\n",
    "                            _, mae, mae_std, mse, mse_std, mre, mre_std = transformer_with_match.groups()\n",
    "                            results[\"MAE w Transformer\"] = f\"{float(mae):.3f} ({float(mae_std):.3f})\"\n",
    "                            results[\"MRE w Transformer\"] = f\"{float(mre):.3f} ({float(mre_std):.3f})\"\n",
    "                            results[\"MSE w Transformer\"] = f\"{float(mse):.3f} ({float(mse_std):.3f})\"\n",
    "\n",
    "            all_results[\"Model\"].append(results[\"Model\"])\n",
    "            all_results[\"MAE wt XGB\"].append(results[\"MAE wt XGB\"])\n",
    "            all_results[\"MRE wt XGB\"].append(results[\"MRE wt XGB\"])\n",
    "            all_results[\"MSE wt XGB\"].append(results[\"MSE wt XGB\"])\n",
    "            all_results[\"MAE w XGB\"].append(results[\"MAE w XGB\"])\n",
    "            all_results[\"MRE w XGB\"].append(results[\"MRE w XGB\"])\n",
    "            all_results[\"MSE w XGB\"].append(results[\"MSE w XGB\"])\n",
    "            all_results[\"MAE w RNN\"].append(results[\"MAE w RNN\"])\n",
    "            all_results[\"MRE w RNN\"].append(results[\"MRE w RNN\"])\n",
    "            all_results[\"MSE w RNN\"].append(results[\"MSE w RNN\"])\n",
    "            all_results[\"MAE w Transformer\"].append(results[\"MAE w Transformer\"])\n",
    "            all_results[\"MRE w Transformer\"].append(results[\"MRE w Transformer\"])\n",
    "            all_results[\"MSE w Transformer\"].append(results[\"MSE w Transformer\"])\n",
    "\n",
    "    df_all = pd.DataFrame(all_results)\n",
    "    df_all.to_csv(f'./results/regression/{dataset}.csv', index=False)\n",
    "\n",
    "log_file_path = './naive_log/downstream_regression_naive.log'\n",
    "with open(log_file_path, 'r') as file:\n",
    "    log_data = file.readlines()\n",
    "\n",
    "# Define regex patterns for extracting relevant data\n",
    "pattern_type_dataset = re.compile(r\"Start running downstream regression task on data/(?P<type>[\\w_]+)/(?P<dataset>[\\w_]+)\")\n",
    "pattern_results = re.compile(r\"(?P<model>\\w+) \\((?P<method>[\\w\\s]+)\\) regression MAE: (?P<MAE>[\\d.]+)±(?P<MAE_CI>[\\d.]+), MSE: (?P<MSE>[\\d.]+)±(?P<MSE_CI>[\\d.]+), MRE: (?P<MRE>[\\d.]+)±(?P<MRE_CI>[\\d.]+)\")\n",
    "\n",
    "# Container for extracted data\n",
    "data = []\n",
    "\n",
    "current_type = None\n",
    "current_dataset = None\n",
    "\n",
    "# Parse the log file\n",
    "for line in log_data:\n",
    "    match_type_dataset = pattern_type_dataset.search(line)\n",
    "    if match_type_dataset:\n",
    "        current_type = match_type_dataset.group('type')\n",
    "        current_dataset = match_type_dataset.group('dataset')\n",
    "        continue\n",
    "\n",
    "    match_results = pattern_results.search(line)\n",
    "    if match_results and current_type and current_dataset:\n",
    "        method = match_results.group('method').strip()\n",
    "        model = match_results.group('model').strip()\n",
    "        MAE = f\"{float(match_results.group('MAE')):.3f} ({float(match_results.group('MAE_CI')):.3f})\"\n",
    "        MSE = f\"{float(match_results.group('MSE')):.3f} ({float(match_results.group('MSE_CI')):.3f})\"\n",
    "        MRE = f\"{float(match_results.group('MRE')):.3f} ({float(match_results.group('MRE_CI')):.3f})\"\n",
    "        \n",
    "        entry = {\n",
    "            'Method': method,\n",
    "            'Type': current_type,\n",
    "            'Dataset': current_dataset,\n",
    "            f\"MAE w {model}\": MAE,\n",
    "            f\"MSE w {model}\": MSE,\n",
    "            f\"MRE w {model}\": MRE\n",
    "        }\n",
    "        data.append(entry)\n",
    "\n",
    "# Convert list of dicts to DataFrame\n",
    "df = pd.DataFrame(data)\n",
    "# Pivot the data to get the desired format\n",
    "df_pivot = df.pivot_table(index=['Method', 'Type', 'Dataset'], \n",
    "                          values=['MAE w XGB', 'MSE w XGB', 'MRE w XGB',\n",
    "                                  'MAE w RNN', 'MSE w RNN', 'MRE w RNN',\n",
    "                                  'MAE w Transformer', 'MSE w Transformer', 'MRE w Transformer'], \n",
    "                          aggfunc='first').reset_index()\n",
    "\n",
    "# # Reorder columns to match the example format\n",
    "df_pivot = df_pivot[['Method', 'Type', 'Dataset', \n",
    "                     'MAE w XGB', 'MRE w XGB', 'MSE w XGB', \n",
    "                     'MAE w RNN', 'MRE w RNN', 'MSE w RNN', \n",
    "                     'MAE w Transformer', 'MRE w Transformer', 'MSE w Transformer']]\n",
    "df_pivot.to_csv('results/regression/downstream_regression_naive.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "datasets = [\"ETT_h1_point_rate01\",\"ETT_h1_block_rate05\", \"ETT_h1_point_rate05\", \"ETT_h1_subseq_rate05\",\n",
    "            \"PeMS_block_rate05\", \"PeMS_point_rate05\",\"PeMS_subseq_rate05\"]\n",
    "\n",
    "# Define regex patterns to extract the required metrics with confidence intervals\n",
    "xgb_without_pattern = re.compile(r\"XGB \\(without imputation\\) forecasting MAE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MSE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MRE: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "xgb_with_pattern = re.compile(r\"XGB \\(with (.+) imputation\\) forecasting MAE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MSE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MRE: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "rnn_with_pattern = re.compile(r\"RNN \\(with (.+) imputation\\) forecasting MAE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MSE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MRE: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "transformer_with_pattern = re.compile(r\"Transformer \\(with (.+) imputation\\) forecasting MAE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MSE: (\\d+\\.\\d+)±(\\d+\\.\\d+), MRE: (\\d+\\.\\d+)±(\\d+\\.\\d+)\")\n",
    "\n",
    "# Loop through each dataset\n",
    "for dataset in datasets:\n",
    "    # Define the path to the log directory\n",
    "    log_dir = f\"./forecasting_log/{dataset}_log\"\n",
    "    # Initialize a dictionary to store the results for all models\n",
    "    all_results = {\n",
    "        \"Model\": [],\n",
    "        \"MAE wt XGB\": [],\n",
    "        \"MRE wt XGB\": [],\n",
    "        \"MSE wt XGB\": [],\n",
    "        \"MAE w XGB\": [],\n",
    "        \"MRE w XGB\": [],\n",
    "        \"MSE w XGB\": [],\n",
    "        \"MAE w RNN\": [],\n",
    "        \"MRE w RNN\": [],\n",
    "        \"MSE w RNN\": [],\n",
    "        \"MAE w Transformer\": [],\n",
    "        \"MRE w Transformer\": [],\n",
    "        \"MSE w Transformer\": []\n",
    "    }\n",
    "\n",
    "    if 'ETT_h1' in dataset:\n",
    "        dataset_name = 'ETT_h1'\n",
    "    elif 'PeMS' in dataset:\n",
    "        dataset_name = 'PeMS'\n",
    "    # Loop through each model\n",
    "    for model in model_names:\n",
    "        log_file_path = os.path.join(log_dir, f\"{model}_{dataset_name}.log\")\n",
    "        if os.path.exists(log_file_path):\n",
    "            results = {\n",
    "                \"Model\": model,\n",
    "                \"MAE wt XGB\": \"\",\n",
    "                \"MRE wt XGB\": \"\",\n",
    "                \"MSE wt XGB\": \"\",\n",
    "                \"MAE w XGB\": \"\",\n",
    "                \"MRE w XGB\": \"\",\n",
    "                \"MSE w XGB\": \"\",\n",
    "                \"MAE w RNN\": \"\",\n",
    "                \"MRE w RNN\": \"\",\n",
    "                \"MSE w RNN\": \"\",\n",
    "                \"MAE w Transformer\": \"\",\n",
    "                \"MRE w Transformer\": \"\",\n",
    "                \"MSE w Transformer\": \"\"\n",
    "            }\n",
    "            \n",
    "            with open(log_file_path, 'r') as file:\n",
    "                lines = file.readlines()\n",
    "                for line in lines:\n",
    "                    if xgb_without_pattern.search(line):\n",
    "                        xgb_without_match = xgb_without_pattern.search(line)\n",
    "                        if xgb_without_match:\n",
    "                            mae, mae_std, mse, mse_std, mre, mre_std = xgb_without_match.groups()\n",
    "                            results[\"MAE wt XGB\"] = f\"{float(mae):.3f} ({float(mae_std):.3f})\"\n",
    "                            results[\"MSE wt XGB\"] = f\"{float(mse):.3f} ({float(mse_std):.3f})\"\n",
    "                            results[\"MRE wt XGB\"] = f\"{float(mre):.3f} ({float(mre_std):.3f})\"\n",
    "                    if xgb_with_pattern.search(line):\n",
    "                        xgb_with_match = xgb_with_pattern.search(line)\n",
    "                        if xgb_with_match:\n",
    "                            _, mae, mae_std, mse, mse_std, mre, mre_std = xgb_with_match.groups()\n",
    "                            results[\"MAE w XGB\"] = f\"{float(mae):.3f} ({float(mae_std):.3f})\"\n",
    "                            results[\"MRE w XGB\"] = f\"{float(mre):.3f} ({float(mre_std):.3f})\"\n",
    "                            results[\"MSE w XGB\"] = f\"{float(mse):.3f} ({float(mse_std):.3f})\"\n",
    "                    if rnn_with_pattern.search(line):\n",
    "                        rnn_with_match = rnn_with_pattern.search(line)\n",
    "                        if rnn_with_match:\n",
    "                            _, mae, mae_std, mse, mse_std, mre, mre_std = rnn_with_match.groups()\n",
    "                            results[\"MAE w RNN\"] = f\"{float(mae):.3f} ({float(mae_std):.3f})\"\n",
    "                            results[\"MRE w RNN\"] = f\"{float(mre):.3f} ({float(mre_std):.3f})\"\n",
    "                            results[\"MSE w RNN\"] = f\"{float(mse):.3f} ({float(mse_std):.3f})\"\n",
    "                    if transformer_with_pattern.search(line):\n",
    "                        transformer_with_match = transformer_with_pattern.search(line)\n",
    "                        if transformer_with_match:\n",
    "                            _, mae, mae_std, mse, mse_std, mre, mre_std = transformer_with_match.groups()\n",
    "                            results[\"MAE w Transformer\"] = f\"{float(mae):.3f} ({float(mae_std):.3f})\"\n",
    "                            results[\"MRE w Transformer\"] = f\"{float(mre):.3f} ({float(mre_std):.3f})\"\n",
    "                            results[\"MSE w Transformer\"] = f\"{float(mse):.3f} ({float(mse_std):.3f})\"\n",
    "\n",
    "            all_results[\"Model\"].append(results[\"Model\"])\n",
    "            all_results[\"MAE wt XGB\"].append(results[\"MAE wt XGB\"])\n",
    "            all_results[\"MRE wt XGB\"].append(results[\"MRE wt XGB\"])\n",
    "            all_results[\"MSE wt XGB\"].append(results[\"MSE wt XGB\"])\n",
    "            all_results[\"MAE w XGB\"].append(results[\"MAE w XGB\"])\n",
    "            all_results[\"MRE w XGB\"].append(results[\"MRE w XGB\"])\n",
    "            all_results[\"MSE w XGB\"].append(results[\"MSE w XGB\"])\n",
    "            all_results[\"MAE w RNN\"].append(results[\"MAE w RNN\"])\n",
    "            all_results[\"MRE w RNN\"].append(results[\"MRE w RNN\"])\n",
    "            all_results[\"MSE w RNN\"].append(results[\"MSE w RNN\"])\n",
    "            all_results[\"MAE w Transformer\"].append(results[\"MAE w Transformer\"])\n",
    "            all_results[\"MRE w Transformer\"].append(results[\"MRE w Transformer\"])\n",
    "            all_results[\"MSE w Transformer\"].append(results[\"MSE w Transformer\"])\n",
    "\n",
    "    df_all = pd.DataFrame(all_results)\n",
    "    df_all.to_csv(f'./results/forecasting/{dataset}.csv', index=False)\n",
    "\n",
    "log_file_path = './naive_log/downstream_forecasting_naive.log'\n",
    "with open(log_file_path, 'r') as file:\n",
    "    log_data = file.readlines()\n",
    "\n",
    "# Define regex patterns for extracting relevant data\n",
    "pattern_type_dataset = re.compile(r\"Start running downstream forecasting task on data/(?P<type>[\\w_]+)/(?P<dataset>[\\w_]+)\")\n",
    "pattern_results = re.compile(r\"(?P<model>\\w+) \\((?P<method>[\\w\\s]+)\\) forecasting MAE: (?P<MAE>[\\d.]+)±(?P<MAE_CI>[\\d.]+), MSE: (?P<MSE>[\\d.]+)±(?P<MSE_CI>[\\d.]+), MRE: (?P<MRE>[\\d.]+)±(?P<MRE_CI>[\\d.]+)\")\n",
    "\n",
    "# Container for extracted data\n",
    "data = []\n",
    "\n",
    "current_type = None\n",
    "current_dataset = None\n",
    "\n",
    "# Parse the log file\n",
    "for line in log_data:\n",
    "    match_type_dataset = pattern_type_dataset.search(line)\n",
    "    if match_type_dataset:\n",
    "        current_type = match_type_dataset.group('type')\n",
    "        current_dataset = match_type_dataset.group('dataset')\n",
    "        continue\n",
    "\n",
    "    match_results = pattern_results.search(line)\n",
    "    if match_results and current_type and current_dataset:\n",
    "        method = match_results.group('method').strip()\n",
    "        model = match_results.group('model').strip()\n",
    "        MAE = f\"{float(match_results.group('MAE')):.3f} ({float(match_results.group('MAE_CI')):.3f})\"\n",
    "        MSE = f\"{float(match_results.group('MSE')):.3f} ({float(match_results.group('MSE_CI')):.3f})\"\n",
    "        MRE = f\"{float(match_results.group('MRE')):.3f} ({float(match_results.group('MRE_CI')):.3f})\"\n",
    "        \n",
    "        entry = {\n",
    "            'Method': method,\n",
    "            'Type': current_type,\n",
    "            'Dataset': current_dataset,\n",
    "            f\"MAE w {model}\": MAE,\n",
    "            f\"MSE w {model}\": MSE,\n",
    "            f\"MRE w {model}\": MRE\n",
    "        }\n",
    "        data.append(entry)\n",
    "\n",
    "# Convert list of dicts to DataFrame\n",
    "df = pd.DataFrame(data)\n",
    "# Pivot the data to get the desired format\n",
    "df_pivot = df.pivot_table(index=['Method', 'Type', 'Dataset'], \n",
    "                          values=['MAE w XGB', 'MSE w XGB', 'MRE w XGB',\n",
    "                                  'MAE w RNN', 'MSE w RNN', 'MRE w RNN',\n",
    "                                  'MAE w Transformer', 'MSE w Transformer', 'MRE w Transformer'], \n",
    "                          aggfunc='first').reset_index()\n",
    "\n",
    "# # Reorder columns to match the example format\n",
    "df_pivot = df_pivot[['Method', 'Type', 'Dataset', \n",
    "                     'MAE w XGB', 'MRE w XGB', 'MSE w XGB', \n",
    "                     'MAE w RNN', 'MRE w RNN', 'MSE w RNN', \n",
    "                     'MAE w Transformer', 'MRE w Transformer', 'MSE w Transformer']]\n",
    "df_pivot.to_csv('./results/forecasting/downstream_forecasting_naive.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pypots",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
