{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['OMP_NUM_THREADS'] = '4' #num works\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import openai\n",
    "import zhipuai\n",
    "import plotly.graph_objects as go\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import sys\n",
    "sys.path.append('')\n",
    "\n",
    "os.environ[\"GEMINI_API_KEY\"] = \"your_gemini_key\"\n",
    "os.environ['OPENAI_API_KEY'] = \"your_openai_key\"\n",
    "\n",
    "openai.api_key = os.environ['OPENAI_API_KEY']\n",
    "openai.api_base = os.environ.get(\"OPENAI_API_BASE\", \"your_openai_api_base\")\n",
    "from data.serialize import SerializerSettings\n",
    "from models.utils import grid_iter\n",
    "from models.promptcast import get_promptcast_predictions_data\n",
    "from models.darts import get_arima_predictions_data\n",
    "from models.darts import get_TCN_predictions_data, get_NHITS_predictions_data, get_NBEATS_predictions_data\n",
    "from models.llmtime import get_llmtime_predictions_data\n",
    "from data.small_context import get_datasets, get_memorization_datasets\n",
    "from models.validation_likelihood_tuning import get_autotuned_predictions_data\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "LLMs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gpt4_hypers = dict(\n",
    "     alpha=0.3,\n",
    "     basic=True,\n",
    "     temp=1.0,\n",
    "     top_p=0.8,\n",
    "     settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-') \n",
    ")\n",
    "\n",
    "gpt3_hypers = dict(\n",
    "    temp=0.7,\n",
    "    alpha=0.95,\n",
    "    beta=0.3,\n",
    "    basic=False,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)\n",
    ")\n",
    "\n",
    "promptcast_hypers = dict(\n",
    "    temp=0.7,\n",
    "    settings=SerializerSettings(base=10, prec=0, signed=True, \n",
    "                                time_sep=', ',\n",
    "                                bit_sep='',\n",
    "                                plus_sign='',\n",
    "                                minus_sign='-',\n",
    "                                half_bin_correction=False,\n",
    "                                decimal_point='')\n",
    ")\n",
    "\n",
    "llama_hypers = dict(\n",
    "    temp=1.0,\n",
    "    alpha=0.99,\n",
    "    beta=0.3,\n",
    "    basic=False,\n",
    "    settings=SerializerSettings(base=10, prec=3, time_sep=',', bit_sep='', plus_sign='', minus_sign='-', signed=True), \n",
    ")\n",
    "mistral_api_hypers = dict(\n",
    "     alpha=0.3,\n",
    "     basic=True,\n",
    "     temp=1.0,\n",
    "     top_p=0.8,\n",
    "     settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-')\n",
    ")\n",
    "\n",
    "llma2_hypers = dict(\n",
    "    temp=0.7,\n",
    "    alpha=0.95,\n",
    "    beta=0.3,\n",
    "    basic=False,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, half_bin_correction=True)\n",
    ")\n",
    "\n",
    "# arima_hypers = dict(p=[12,30], d=[1,2], q=[0])\n",
    "gemini_hypers = dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1.0,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "\n",
    "claude_hypers = dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1.0,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "\n",
    "glm_hypers = dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "\n",
    "qwen_hypers = dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1.0,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "\n",
    "moonshot_hypers = dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1.0,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "\n",
    "deepseek_hypers = dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1.0,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "\n",
    "doubao_hypers=dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1.0,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "baidu_hypers=dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1.0,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "spark_hypers=dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1.0,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "yi_hypers=dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1.0,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "llama_hypers=dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1.0,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "grok_hypers=dict(\n",
    "    alpha=0.95,\n",
    "    basic=True,\n",
    "    temp=1.0,\n",
    "    top_p=0.8,\n",
    "    settings=SerializerSettings(base=10, prec=3, signed=True, time_sep=', ', bit_sep='', minus_sign='-', half_bin_correction=False)\n",
    ")\n",
    "\n",
    "model_hypers = {\n",
    "    'LLMTime GPT-3.5-turbo': {'model': 'gpt-3.5-turbo', **gpt3_hypers},\n",
    "    'LLMTime GPT-3': {'model': 'gpt-3.5-turbo-instruct', **gpt3_hypers},\n",
    "    'LLMTime GPT-4':{'model':'gpt-4',**gpt4_hypers},\n",
    "    'PromptCast GPT-3': {'model': 'gpt-3.5-turbo-instruct', **promptcast_hypers},\n",
    "    'PromptCast gemini': {'model': 'gemini-1.5-flash', **gemini_hypers},\n",
    "    'PromptCast claude': {'model': 'claude-3-5-sonnet-20240620', **claude_hypers},\n",
    "    'llama-70b':{'model':'llama-70b',**llama_hypers},\n",
    "    'mistral-api-tiny':{'model':'mistral-api-tiny',**mistral_api_hypers},\n",
    "    #'LLMA2':{'model':'LLMA2',**llma2_hypers},\n",
    "    'mistral':{'model':'mistral',**mistral_api_hypers},\n",
    "    'gemini-1.5-flash-latest': {'model': 'gemini-1.5-flash-latest', **gemini_hypers},\n",
    "    'gemini-1.5-flash-8b': {'model': 'gemini-1.5-flash-8b', **gemini_hypers},\n",
    "    'gemini-2.0-flash':{'model': 'gemini-2.0-flash', **gemini_hypers},\n",
    "    'gemini-2.0-flash-lite':{'model': 'gemini-2.0-flash-lite', **gemini_hypers},\n",
    "    'ge-2.5-flash':{'model': 'ge-2.5-flash', **gemini_hypers},\n",
    "    'grok-2-1212':{'model': 'grok-2-1212',**grok_hypers},\n",
    "    'claude-3-5-haiku-20241022': {'model': 'claude-3-5-haiku-20241022', **claude_hypers},\n",
    "    'claude-3-5-sonnet-20240620': {'model': 'claude-3-5-sonnet-20240620', **claude_hypers},\n",
    "    'glm-4-air': {'model': 'glm-4-air', **glm_hypers},\n",
    "    'glm-4-long': {'model': 'glm-4-long', **glm_hypers},\n",
    "    'qwen-plus': {'model': 'qwen-plus', **qwen_hypers},\n",
    "    'qwen-turbo': {'model': 'qwen-turbo', **qwen_hypers},\n",
    "    \"Qwen2.5-32B-Instruct\": {'model': 'Qwen2.5-32B-Instruct', **qwen_hypers},\n",
    "    \"qwen3-32b\":{'model': 'qwen3-32b', **qwen_hypers},\n",
    "    \"qwen3-14b\":{'model': 'qwen3-14b', **qwen_hypers},\n",
    "    \"qwen3-8b\":{'model': 'qwen3-8b', **qwen_hypers},\n",
    "    'moonshot-v1-8k': {'model': 'moonshot-v1-8k', **moonshot_hypers},\n",
    "    'moonshot-v1-32k': {'model': 'moonshot-v1-32k', **moonshot_hypers},\n",
    "    'deepseek-coder': {'model': 'deepseek-coder', **deepseek_hypers},\n",
    "    \"DeepSeek-R1-Distill-Qwen-1.5B\": {'model': 'DeepSeek-R1-Distill-Qwen-1.5B', **deepseek_hypers},\n",
    "    \"DeepSeek-R1-Distill-Qwen-7B\": {'model': 'DeepSeek-R1-Distill-Qwen-7B', **deepseek_hypers},\n",
    "    \"DeepSeek-R1-Distill-Qwen-14B\": {'model': 'DeepSeek-R1-Distill-Qwen-14B', **deepseek_hypers},\n",
    "    \"DeepSeek-R1-Distill-Qwen-32B\":{'model':'DeepSeek-R1-Distill-Qwen-32B',**deepseek_hypers},\n",
    "    \"deepseek-r1\":{'model':'deepseek-r1',**deepseek_hypers},\n",
    "    \"deepseek-v3\":{'model':'deepseek-v3',**deepseek_hypers},\n",
    "    \"claude-3-opus-20240229\": {'model': 'claude-3-opus-20240229', **claude_hypers},}\n",
    "\n",
    "\n",
    "model_predict_fns = {\n",
    "    #'LLMTime GPT-3.5-turbo': get_llmtime_predictions_data,\n",
    "    #'LLMTime GPT-3': get_llmtime_predictions_data,\n",
    "    #'LLMTime GPT-4': get_llmtime_predictions_data,\n",
    "    #'PromptCast GPT-3': get_promptcast_predictions_data,\n",
    "    #'PromptCast gemini': get_promptcast_predictions_data,\n",
    "    #'PromptCast claude': get_promptcast_predictions_data,\n",
    "    #'ARIMA': get_arima_predictions_data,\n",
    "    #'llama-70b': get_llmtime_predictions_data,\n",
    "    # 'TCN': get_TCN_predictions_data,\n",
    "    # 'N-BEATS': get_NBEATS_predictions_data,\n",
    "    # 'N-HiTS': get_NHITS_predictions_data,\n",
    "    #'mistral-api-tiny': get_llmtime_predictions_data,\n",
    "    #'LLMA2': get_llmtime_predictions_data,\n",
    "    #'mistral': get_llmtime_predictions_data,\n",
    "    #'gemini-1.5-flash-latest': get_llmtime_predictions_data,\n",
    "    #'gemini-1.5-flash-8b': get_llmtime_predictions_data,\n",
    "    #'gemini-2.0-flash':get_llmtime_predictions_data,\n",
    "    'gemini-2.0-flash-lite':get_llmtime_predictions_data,\n",
    "    #'ge-2.5-flash':get_llmtime_predictions_data,\n",
    "    #'grok-2-1212':get_llmtime_predictions_data,\n",
    "    #'claude-3-5-haiku-20241022': get_llmtime_predictions_data,\n",
    "    #'claude-3-5-sonnet-20240620': get_llmtime_predictions_data,#m:12\n",
    "    #'glm-4-air': get_llmtime_predictions_data,\n",
    "    #'glm-4-long': get_llmtime_predictions_data,\n",
    "    #'qwen-plus': get_llmtime_predictions_data,\n",
    "    #'qwen-turbo': get_llmtime_predictions_data,\n",
    "    #\"Qwen2.5-32B-Instruct\":get_llmtime_predictions_data,\n",
    "    #\"qwen3-32b\":get_llmtime_predictions_data,\n",
    "    #\"qwen3-14b\":get_llmtime_predictions_data,\n",
    "    #\"qwen3-8b\":get_llmtime_predictions_data,\n",
    "    #'moonshot-v1-8k': get_llmtime_predictions_data,\n",
    "    #'moonshot-v1-32k': get_llmtime_predictions_data,\n",
    "    #'deepseek-coder': get_llmtime_predictions_data,\n",
    "    #\"DeepSeek-R1-Distill-Qwen-1.5B\": get_llmtime_predictions_data,\n",
    "    #\"DeepSeek-R1-Distill-Qwen-7B\": get_llmtime_predictions_data,\n",
    "    #\"DeepSeek-R1-Distill-Qwen-14B\": get_llmtime_predictions_data,\n",
    "    #\"DeepSeek-R1-Distill-Qwen-32B\":get_llmtime_predictions_data,\n",
    "    #\"deepseek-r1\":get_llmtime_predictions_data,\n",
    "    \"deepseek-v3\":get_llmtime_predictions_data,\n",
    "    #\"claude-3-opus-20240229\": get_llmtime_predictions_data, \n",
    "}\n",
    "model_names = list(model_predict_fns.keys())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Memorization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from docx import Document\n",
    "from docx.shared import Inches\n",
    "import numpy as np\n",
    "from param import random_seed\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "import plotly.graph_objects as go\n",
    "import mpld3  \n",
    "from collections import defaultdict\n",
    "\n",
    "\n",
    "noise_levels = [0.001, 0.005, 0.01, 0.02, 0.05]\n",
    "\n",
    "noisy_sequences = {}\n",
    "\n",
    "for i in range(1, 11):\n",
    "    np.random.seed(i)\n",
    "    noisy_sequences[f\"noisy_seq_{i}\"] = {nl: get_memorization_datasets(noise=True, noise_level=nl, noise_type = 'gaussian') for nl in noise_levels}\n",
    "dataset_original = get_memorization_datasets(noise=False) \n",
    "\n",
    "out_noisy = {nl: {} for nl in noise_levels}\n",
    "metrics_noisy = {nl: {} for nl in noise_levels}\n",
    "temp_noisy={nl: {} for nl in noise_levels}\n",
    "out_original = {}\n",
    "metrics_original = {}\n",
    "temp_original={}\n",
    "\n",
    "output_dir = \"Memorization/LLMS/uncertainty/\"\n",
    "\n",
    "os.makedirs(output_dir, exist_ok=True)\n",
    "\n",
    "for ds_name in dataset_original.keys():\n",
    "    for model in model_names:\n",
    "        doc = Document()\n",
    "        doc.add_heading('Evaluate', 0)\n",
    "        doc.add_heading(f'Dataset: {ds_name}', level=1)\n",
    "        doc.add_heading(f'Model: {model}', level=2)\n",
    "\n",
    "\n",
    "        model_hypers[model].update({'dataset_name': ds_name})\n",
    "        hypers = list(grid_iter(model_hypers[model]))\n",
    "        num_samples = 3\n",
    "\n",
    "        train_original, test_original = dataset_original[ds_name]\n",
    "\n",
    "        out_original[ds_name] = {}\n",
    "        metrics_original[ds_name] = {}\n",
    "        temp_original[ds_name]={}\n",
    "\n",
    "        while True:\n",
    "            try:\n",
    "                pred_dict_original = get_autotuned_predictions_data(train_original, test_original, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)\n",
    "                temp_original[ds_name][model] = pred_dict_original  \n",
    "            except Exception as e:\n",
    "                print(f\"Error with original data for dataset {ds_name} and model {model}: {e}\")\n",
    "                continue\n",
    "\n",
    "            try:\n",
    "                true_values_original = dataset_original[ds_name][1]\n",
    "                pred_values_original = temp_original[ds_name][model]['median']\n",
    "\n",
    "        \n",
    "                if true_values_original.isnull().any() or pred_values_original.isnull().any():\n",
    "                    print(\"Data contains NaN values. Dropping NaN values.\")\n",
    "                    \n",
    "                    true_values_original = true_values_original.dropna()\n",
    "                    pred_values_original = pred_values_original.dropna()            \n",
    "\n",
    "                mse_original = mean_squared_error(true_values_original, pred_values_original)\n",
    "                mae_original = mean_absolute_error(true_values_original, pred_values_original)\n",
    "                \n",
    "                # Calculate NMSE and NMAE\n",
    "                var_true = np.var(true_values_original)\n",
    "                nmse_original = mse_original / var_true\n",
    "                nmae_original = mae_original / np.mean(np.abs(true_values_original))\n",
    "                if nmse_original<10:\n",
    "                    metrics_original[ds_name][model] = {'nmse': nmse_original, 'nmae': nmae_original, 'mse': mse_original, 'mae': mae_original}\n",
    "                    out_original[ds_name][model] = pred_dict_original\n",
    "                    print('original data:OK!')\n",
    "                    break\n",
    "                else:\n",
    "                    print('NMSE failed',nmse_original)\n",
    "            except KeyError as e:\n",
    "                print(f\"Key error in original predictions for dataset {ds_name} and model {model}: {e}\")\n",
    "                continue\n",
    "\n",
    "    \n",
    "        out_noisy = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))\n",
    "        temp_noisy=defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))\n",
    "        metrics_noisy = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))\n",
    "        \n",
    "        nl=0.05\n",
    "\n",
    "        for i in range(1,11):\n",
    "            data_noisy = noisy_sequences[f\"noisy_seq_{i}\"][nl][ds_name]\n",
    "            train_noisy, test_noisy = data_noisy\n",
    "            out_noisy[i][nl][ds_name] = {}\n",
    "            temp_noisy[i][nl][ds_name]={}  \n",
    "            metrics_noisy[i][nl][ds_name] = {}\n",
    "            while True:\n",
    "                try:\n",
    "                    pred_dict_noisy = get_autotuned_predictions_data(train_noisy, test_noisy, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)\n",
    "                    temp_noisy[i][nl][ds_name][model] = pred_dict_noisy  \n",
    "                \n",
    "                except Exception as e:\n",
    "                    print(f\"Error with noisy data{i} (noise level {nl}) for dataset {ds_name} and model {model}: {e}\")\n",
    "                    continue\n",
    "\n",
    "                try:\n",
    "                    true_values_noisy = test_noisy\n",
    "                    pred_values_noisy = temp_noisy[i][nl][ds_name][model]['median']\n",
    "\n",
    "                    \n",
    "                    if true_values_original.isnull().any() or pred_values_original.isnull().any():\n",
    "                        print(\"Data contains NaN values. Dropping NaN values.\")\n",
    "                            \n",
    "                        true_values_original = true_values_noisy.dropna()\n",
    "                        pred_values_original = pred_values_noisy.dropna()\n",
    "\n",
    "                    mse_noisy = mean_squared_error(true_values_noisy, pred_values_noisy)\n",
    "                    mae_noisy = mean_absolute_error(true_values_noisy, pred_values_noisy)\n",
    "                        \n",
    "                    var_true = np.var(true_values_noisy)\n",
    "                    \n",
    "                    nmse_noisy = mse_noisy / var_true\n",
    "                    nmae_noisy = mae_noisy / np.mean(np.abs(true_values_noisy))\n",
    "                    if nmse_noisy<12:\n",
    "                        metrics_noisy[i][nl][ds_name][model] = {'nmse': nmse_noisy, 'nmae': nmae_noisy, 'mse': mse_noisy, 'mae': mae_noisy}\n",
    "                        out_noisy[i][nl][ds_name][model] = pred_dict_noisy\n",
    "                        print(f'noisy data_{i}:OK!')\n",
    "                        break\n",
    "                    else:\n",
    "                        print('nmse failed:',nmse_noisy)\n",
    "                except KeyError as e:\n",
    "                    print(f\"Key error in noisy{i} predictions (noise level {nl}) for dataset {ds_name} and model {model}: {e}\")\n",
    "                    continue\n",
    "\n",
    "        # Add original data results\n",
    "        if ds_name in metrics_original and model in metrics_original[ds_name]:\n",
    "            doc.add_paragraph(f'Original:NMSE = {metrics_original[ds_name][model][\"nmse\"]}, NMAE = {metrics_original[ds_name][model][\"nmae\"]}, MSE = {metrics_original[ds_name][model][\"mse\"]}, MAE = {metrics_original[ds_name][model][\"mae\"]}')\n",
    "            \n",
    "        nl=0.05\n",
    "        for i in range(1,11):\n",
    "            if nl in metrics_noisy[i] and ds_name in metrics_noisy[i][nl] and model in metrics_noisy[i][nl][ds_name]:\n",
    "                doc.add_paragraph(f'Noisy dataset {i} with Noisy (level {nl}):NMSE = {metrics_noisy[i][nl][ds_name][model][\"nmse\"]}, NMAE = {metrics_noisy[i][nl][ds_name][model][\"nmae\"]}, MSE = {metrics_noisy[i][nl][ds_name][model][\"mse\"]}, MAE = {metrics_noisy[i][nl][ds_name][model][\"mae\"]}')\n",
    "                  \n",
    "        excel_path = os.path.join(output_dir, f'predictions_samples_{ds_name}_{model}_{num_samples}.xlsx')\n",
    "        with pd.ExcelWriter(excel_path) as writer:\n",
    "            pd.DataFrame(out_original[ds_name][model]['samples']).to_excel(writer, sheet_name='Original Samples')\n",
    "            pd.DataFrame(dataset_original[ds_name][1]).to_excel(writer, sheet_name='Original test dataset')\n",
    "            pd.DataFrame(out_original[ds_name][model]['median']).to_excel(writer, sheet_name='Original pred')\n",
    "            nl=0.05\n",
    "            results = []\n",
    "            for i in range(1,11):\n",
    "                result_dict = {'dataset': f'noisy_seq_{i}','NMSE':metrics_noisy[i][nl][ds_name][model][\"nmse\"], 'NMAE':metrics_noisy[i][nl][ds_name][model][\"nmae\"],'MSE':metrics_noisy[i][nl][ds_name][model][\"mse\"], 'MAE':metrics_noisy[i][nl][ds_name][model][\"mae\"]}\n",
    "                for j, value in enumerate(out_noisy[i][nl][ds_name][model]['median']):\n",
    "                    result_dict[f'{j}'] = value\n",
    "                \n",
    "                results.append(result_dict)\n",
    "            df = pd.DataFrame(results)\n",
    "            numeric_columns = [col for col in df.columns if col.isdigit()]\n",
    "            variance_values = df[numeric_columns].var().to_dict()\n",
    "            mean_values = df[numeric_columns].mean().to_dict()  \n",
    "            mean_row = {'dataset': 'mean'}  \n",
    "            mean_row.update(mean_values)  \n",
    "            df = pd.concat([df, pd.DataFrame([mean_row])], ignore_index=True)\n",
    "            variance_row = {'dataset': 'var'}\n",
    "            variance_row.update(variance_values)\n",
    "            df = pd.concat([df, pd.DataFrame([variance_row])], ignore_index=True)\n",
    "            pd.DataFrame(df).to_excel(writer, sheet_name=f'predict result(nosiy level{nl})')\n",
    "            for i in range(1,11):\n",
    "                pd.DataFrame(out_noisy[i][nl][ds_name][model]['samples']).to_excel(writer, sheet_name=f'Noisy Samples {i}(nosiy level{nl})')\n",
    "                pd.DataFrame(noisy_sequences[f\"noisy_seq_{i}\"][nl][ds_name][1]).to_excel(writer, sheet_name='Noisy test dataset')\n",
    "        doc_path = os.path.join(output_dir, f'model_predictions_results_{ds_name}_{model}_{num_samples}.docx')\n",
    "        doc.save(doc_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Darts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from docx import Document\n",
    "from docx.shared import Inches\n",
    "import numpy as np\n",
    "from param import random_seed\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "import plotly.graph_objects as go\n",
    "import mpld3  \n",
    "from collections import defaultdict\n",
    "from data.small_context import get_datasets, get_memorization_datasets\n",
    "\n",
    "noise_levels = [0.001, 0.005, 0.01, 0.02, 0.05]\n",
    "\n",
    "noisy_sequences = {}\n",
    "\n",
    "for i in range(1, 11):\n",
    "    np.random.seed(i)\n",
    "    noisy_sequences[f\"noisy_seq_{i}\"] = {nl: get_datasets(noise=True, noise_level=nl,noise_type='gaussian') for nl in noise_levels}\n",
    "dataset_original = get_datasets() \n",
    "\n",
    "out_noisy = {nl: {} for nl in noise_levels}\n",
    "metrics_noisy = {nl: {} for nl in noise_levels}\n",
    "temp_noisy={nl: {} for nl in noise_levels}\n",
    "out_original = {}\n",
    "metrics_original = {}\n",
    "temp_original={}\n",
    "\n",
    "output_dir = \"Darts/LLMS/uncertainty/\"\n",
    "\n",
    "os.makedirs(output_dir, exist_ok=True)\n",
    "\n",
    "for ds_name in dataset_original.keys():\n",
    "    for model in model_names:\n",
    "        \n",
    "        doc = Document()\n",
    "        doc.add_heading('Evaluate', 0)\n",
    "        doc.add_heading(f'Dataset: {ds_name}', level=1)\n",
    "        doc.add_heading(f'Model: {model}', level=2)\n",
    "\n",
    "        model_hypers[model].update({'dataset_name': ds_name})\n",
    "        hypers = list(grid_iter(model_hypers[model]))\n",
    "        num_samples = 3\n",
    "\n",
    "        train_original, test_original = dataset_original[ds_name]\n",
    "\n",
    "        out_original[ds_name] = {}\n",
    "        metrics_original[ds_name] = {}\n",
    "        temp_original[ds_name]={}\n",
    "\n",
    "        while True:\n",
    "            try:\n",
    "                pred_dict_original = get_autotuned_predictions_data(train_original, test_original, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)\n",
    "                temp_original[ds_name][model] = pred_dict_original  \n",
    "            except Exception as e:\n",
    "                print(f\"Error with original data for dataset {ds_name} and model {model}: {e}\")\n",
    "                continue\n",
    "\n",
    "            try:\n",
    "                true_values_original = dataset_original[ds_name][1]\n",
    "                pred_values_original = temp_original[ds_name][model]['median']\n",
    "\n",
    "                if true_values_original.isnull().any() or pred_values_original.isnull().any():\n",
    "                    print(\"Data contains NaN values. Dropping NaN values.\")\n",
    "                    true_values_original = true_values_original.dropna()\n",
    "                    pred_values_original = pred_values_original.dropna()            \n",
    "\n",
    "                mse_original = mean_squared_error(true_values_original, pred_values_original)\n",
    "                mae_original = mean_absolute_error(true_values_original, pred_values_original)\n",
    "                \n",
    "                # Calculate NMSE and NMAE\n",
    "                var_true = np.var(true_values_original)\n",
    "                nmse_original = mse_original / var_true\n",
    "                nmae_original = mae_original / np.mean(np.abs(true_values_original))\n",
    "                if nmse_original<10:\n",
    "                    metrics_original[ds_name][model] = {'nmse': nmse_original, 'nmae': nmae_original, 'mse': mse_original, 'mae': mae_original}\n",
    "                    out_original[ds_name][model] = pred_dict_original\n",
    "                    print('Original data:OK!')\n",
    "                    break\n",
    "                else:\n",
    "                    print('NMSE failed:',nmse_original)\n",
    "            except KeyError as e:\n",
    "                print(f\"Key error in original predictions for dataset {ds_name} and model {model}: {e}\")\n",
    "                continue\n",
    "\n",
    "        out_noisy = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))\n",
    "        temp_noisy=defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))\n",
    "        metrics_noisy = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))\n",
    "    \n",
    "        nl=0.05\n",
    "\n",
    "        for i in range(1,11):\n",
    "            data_noisy = noisy_sequences[f\"noisy_seq_{i}\"][nl][ds_name]\n",
    "            train_noisy, test_noisy = data_noisy \n",
    "            out_noisy[i][nl][ds_name] = {}\n",
    "            temp_noisy[i][nl][ds_name]={}  \n",
    "            metrics_noisy[i][nl][ds_name] = {}\n",
    "            while True:\n",
    "                try:\n",
    "                    pred_dict_noisy = get_autotuned_predictions_data(train_noisy, test_noisy, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)\n",
    "                    temp_noisy[i][nl][ds_name][model] = pred_dict_noisy \n",
    "                \n",
    "                except Exception as e:\n",
    "                    print(f\"Error with noisy data{i} (noise level {nl}) for dataset {ds_name} and model {model}: {e}\")\n",
    "                    continue\n",
    "\n",
    "                try:\n",
    "                    true_values_noisy = test_noisy\n",
    "                    pred_values_noisy = temp_noisy[i][nl][ds_name][model]['median']\n",
    "\n",
    "                    if true_values_original.isnull().any() or pred_values_original.isnull().any():\n",
    "                        print(\"Data contains NaN values. Dropping NaN values.\")\n",
    "                        true_values_original = true_values_noisy.dropna()\n",
    "                        pred_values_original = pred_values_noisy.dropna()\n",
    "\n",
    "                    mse_noisy = mean_squared_error(true_values_noisy, pred_values_noisy)\n",
    "                    mae_noisy = mean_absolute_error(true_values_noisy, pred_values_noisy)\n",
    "                        \n",
    "                        # Calculate NMSE and NMAE\n",
    "                    var_true = np.var(true_values_noisy)\n",
    "                    #confidence=np.var(pred_values_noisy)\n",
    "                    nmse_noisy = mse_noisy / var_true\n",
    "                    nmae_noisy = mae_noisy / np.mean(np.abs(true_values_noisy))\n",
    "                    if nmse_noisy<10:\n",
    "                        metrics_noisy[i][nl][ds_name][model] = {'nmse': nmse_noisy, 'nmae': nmae_noisy, 'mse': mse_noisy, 'mae': mae_noisy}\n",
    "                        out_noisy[i][nl][ds_name][model] = pred_dict_noisy\n",
    "                        print(f'noisy data_{i}:OK!')\n",
    "                        break\n",
    "                    else:\n",
    "                        print('nmse failed',nmse_noisy)\n",
    "                except KeyError as e:\n",
    "                    print(f\"Key error in noisy{i} predictions (noise level {nl}) for dataset {ds_name} and model {model}: {e}\")\n",
    "                    continue\n",
    "\n",
    "        # Add original data results\n",
    "        if ds_name in metrics_original and model in metrics_original[ds_name]:\n",
    "            doc.add_paragraph(f'Original:NMSE = {metrics_original[ds_name][model][\"nmse\"]}, NMAE = {metrics_original[ds_name][model][\"nmae\"]}, MSE = {metrics_original[ds_name][model][\"mse\"]}, MAE = {metrics_original[ds_name][model][\"mae\"]}')\n",
    "           \n",
    "        nl=0.05\n",
    "        for i in range(1,11):\n",
    "            if nl in metrics_noisy[i] and ds_name in metrics_noisy[i][nl] and model in metrics_noisy[i][nl][ds_name]:\n",
    "                doc.add_paragraph(f'Noisy dataset {i} with Noisy (level {nl}):NMSE = {metrics_noisy[i][nl][ds_name][model][\"nmse\"]}, NMAE = {metrics_noisy[i][nl][ds_name][model][\"nmae\"]}, MSE = {metrics_noisy[i][nl][ds_name][model][\"mse\"]}, MAE = {metrics_noisy[i][nl][ds_name][model][\"mae\"]}')\n",
    "                   \n",
    "        excel_path = os.path.join(output_dir, f'predictions_samples_{ds_name}_{model}_{num_samples}.xlsx')\n",
    "        with pd.ExcelWriter(excel_path) as writer:\n",
    "            pd.DataFrame(out_original[ds_name][model]['samples']).to_excel(writer, sheet_name='Original Samples')\n",
    "            pd.DataFrame(dataset_original[ds_name][1]).to_excel(writer, sheet_name='Original test dataset')\n",
    "            pd.DataFrame(out_original[ds_name][model]['median']).to_excel(writer, sheet_name='Original pred')\n",
    "            nl=0.05\n",
    "            results = []\n",
    "            for i in range(1,11):\n",
    "                result_dict = {'dataset': f'noisy_seq_{i}','NMSE':metrics_noisy[i][nl][ds_name][model][\"nmse\"], 'NMAE':metrics_noisy[i][nl][ds_name][model][\"nmae\"],'MSE':metrics_noisy[i][nl][ds_name][model][\"mse\"], 'MAE':metrics_noisy[i][nl][ds_name][model][\"mae\"]}\n",
    "                for j, value in enumerate(out_noisy[i][nl][ds_name][model]['median']):\n",
    "                    result_dict[f'{j}'] = value\n",
    "                \n",
    "                results.append(result_dict)\n",
    "            df = pd.DataFrame(results)\n",
    "            numeric_columns = [col for col in df.columns if col.isdigit()]\n",
    "            variance_values = df[numeric_columns].var().to_dict()\n",
    "            mean_values = df[numeric_columns].mean().to_dict()  \n",
    "\n",
    "            mean_row = {'dataset': 'mean'}  \n",
    "            mean_row.update(mean_values)  \n",
    "            df = pd.concat([df, pd.DataFrame([mean_row])], ignore_index=True)\n",
    "            variance_row = {'dataset': 'var'}\n",
    "            variance_row.update(variance_values)\n",
    "         \n",
    "            df = pd.concat([df, pd.DataFrame([variance_row])], ignore_index=True)\n",
    "            pd.DataFrame(df).to_excel(writer, sheet_name=f'predict result(nosiy level{nl})')\n",
    "            for i in range(1,11):\n",
    "                pd.DataFrame(out_noisy[i][nl][ds_name][model]['samples']).to_excel(writer, sheet_name=f'Noisy Samples {i}(nosiy level{nl})')\n",
    "                pd.DataFrame(noisy_sequences[f\"noisy_seq_{i}\"][nl][ds_name][1]).to_excel(writer, sheet_name='Noisy test dataset')\n",
    "\n",
    "        doc_path = os.path.join(output_dir, f'model_predictions_results_{ds_name}_{model}_{num_samples}.docx')\n",
    "        doc.save(doc_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "informer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from docx import Document\n",
    "from docx.shared import Inches\n",
    "import numpy as np\n",
    "from param import random_seed\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "from data.small_context import get_ETTh1_datasets,get_ETTh2_datasets,get_ETTm1_datasets,get_ETTm2_datasets,get_exchange_rate_datasets,get_national_illness_datasets\n",
    "import plotly.graph_objects as go\n",
    "import mpld3 \n",
    "from collections import defaultdict\n",
    "from data.small_context import get_datasets, get_memorization_datasets\n",
    "\n",
    "noise_levels = [0.001, 0.005, 0.01, 0.02, 0.05]\n",
    "\n",
    "noisy_sequences = {}\n",
    "\n",
    "for i in range(1, 11):\n",
    "    np.random.seed(i)\n",
    "    noisy_sequences[f\"noisy_seq_{i}\"] = {nl: get_ETTh1_datasets(noise=True, noise_level=nl,noise_type='gaussian') for nl in noise_levels}\n",
    "dataset_original = get_ETTh1_datasets() \n",
    "\n",
    "out_noisy = {nl: {} for nl in noise_levels}\n",
    "metrics_noisy = {nl: {} for nl in noise_levels}\n",
    "temp_noisy={nl: {} for nl in noise_levels}\n",
    "out_original = {}\n",
    "metrics_original = {}\n",
    "temp_original={}\n",
    "\n",
    "output_dir = \"Informer/ETTh1/LLMS/uncertainty/\"\n",
    "\n",
    "os.makedirs(output_dir, exist_ok=True)\n",
    "\n",
    "for ds_name in dataset_original.keys():\n",
    "    for model in model_names:\n",
    "     \n",
    "        k=0\n",
    "        doc = Document()\n",
    "        doc.add_heading('Evaluate', 0)\n",
    "        doc.add_heading(f'Dataset: {ds_name}', level=1)\n",
    "        doc.add_heading(f'Model: {model}', level=2)\n",
    "\n",
    "        model_hypers[model].update({'dataset_name': ds_name})\n",
    "        hypers = list(grid_iter(model_hypers[model]))\n",
    "        num_samples = 3\n",
    "\n",
    "        train_original, test_original = dataset_original[ds_name]\n",
    "\n",
    "        out_original[ds_name] = {}\n",
    "        metrics_original[ds_name] = {}\n",
    "        temp_original[ds_name]={}\n",
    "\n",
    "        while True:\n",
    "            try:\n",
    "                pred_dict_original = get_autotuned_predictions_data(train_original, test_original, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)\n",
    "                temp_original[ds_name][model] = pred_dict_original  \n",
    "            except Exception as e:\n",
    "                print(f\"Error with original data for dataset {ds_name} and model {model}: {e}\")\n",
    "                continue\n",
    "\n",
    "            try:\n",
    "                true_values_original = dataset_original[ds_name][1]\n",
    "                pred_values_original = temp_original[ds_name][model]['median']\n",
    "\n",
    "          \n",
    "                if true_values_original.isnull().any() or pred_values_original.isnull().any():\n",
    "                    print(\"Data contains NaN values. Dropping NaN values.\")\n",
    "                    true_values_original = true_values_original.dropna()\n",
    "                    pred_values_original = pred_values_original.dropna()            \n",
    "\n",
    "                mse_original = mean_squared_error(true_values_original, pred_values_original)\n",
    "                mae_original = mean_absolute_error(true_values_original, pred_values_original)\n",
    "                \n",
    "                # Calculate NMSE and NMAE\n",
    "                var_true = np.var(true_values_original)\n",
    "                nmse_original = mse_original / var_true\n",
    "                nmae_original = mae_original / np.mean(np.abs(true_values_original))\n",
    "                if nmse_original<10:\n",
    "                    metrics_original[ds_name][model] = {'nmse': nmse_original, 'nmae': nmae_original, 'mse': mse_original, 'mae': mae_original}\n",
    "                    out_original[ds_name][model] = pred_dict_original\n",
    "                    print('Original dataset:OK!')\n",
    "                    break\n",
    "                else:\n",
    "                    print('NMSE failed',nmse_original)\n",
    "                    \n",
    "            except KeyError as e:\n",
    "                print(f\"Key error in original predictions for dataset {ds_name} and model {model}: {e}\")\n",
    "                continue\n",
    "\n",
    "   \n",
    "        out_noisy = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))\n",
    "        temp_noisy=defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))\n",
    "        metrics_noisy = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))\n",
    "        \n",
    "        nl=0.05\n",
    "\n",
    "        for i in range(1,11):\n",
    "            k=0\n",
    "            data_noisy = noisy_sequences[f\"noisy_seq_{i}\"][nl][ds_name]\n",
    "            train_noisy, test_noisy = data_noisy\n",
    "               \n",
    "                \n",
    "            out_noisy[i][nl][ds_name] = {}\n",
    "            temp_noisy[i][nl][ds_name]={}  \n",
    "            metrics_noisy[i][nl][ds_name] = {}\n",
    "            while True:\n",
    "                try:\n",
    "                    pred_dict_noisy = get_autotuned_predictions_data(train_noisy, test_noisy, hypers, num_samples, model_predict_fns[model], verbose=False, parallel=False)\n",
    "                    temp_noisy[i][nl][ds_name][model] = pred_dict_noisy  \n",
    "                \n",
    "                except Exception as e:\n",
    "                    print(f\"Error with noisy data{i} (noise level {nl}) for dataset {ds_name} and model {model}: {e}\")\n",
    "                    continue\n",
    "\n",
    "                try:\n",
    "                    true_values_noisy = test_noisy\n",
    "                    pred_values_noisy = temp_noisy[i][nl][ds_name][model]['median']\n",
    "\n",
    "                    \n",
    "                    if true_values_original.isnull().any() or pred_values_original.isnull().any():\n",
    "                        print(\"Data contains NaN values. Dropping NaN values.\")\n",
    "                            \n",
    "                        true_values_original = true_values_noisy.dropna()\n",
    "                        pred_values_original = pred_values_noisy.dropna()\n",
    "\n",
    "                    mse_noisy = mean_squared_error(true_values_noisy, pred_values_noisy)\n",
    "                    mae_noisy = mean_absolute_error(true_values_noisy, pred_values_noisy)\n",
    "                        \n",
    "                        # Calculate NMSE and NMAE\n",
    "                    var_true = np.var(true_values_noisy)\n",
    "                   \n",
    "                    nmse_noisy = mse_noisy / var_true\n",
    "                    nmae_noisy = mae_noisy / np.mean(np.abs(true_values_noisy))\n",
    "                    if nmse_noisy<10:\n",
    "                        metrics_noisy[i][nl][ds_name][model] = {'nmse': nmse_noisy, 'nmae': nmae_noisy, 'mse': mse_noisy, 'mae': mae_noisy}\n",
    "                        out_noisy[i][nl][ds_name][model] = pred_dict_noisy\n",
    "                        print(f'noisy data_{i}:OK!')\n",
    "                        break\n",
    "                    else:\n",
    "                        print('nmse failed:',nmse_noisy)\n",
    "                       \n",
    "                except KeyError as e:\n",
    "                    print(f\"Key error in noisy{i} predictions (noise level {nl}) for dataset {ds_name} and model {model}: {e}\")\n",
    "                    continue\n",
    "\n",
    "        # Add original data results\n",
    "        if ds_name in metrics_original and model in metrics_original[ds_name]:\n",
    "            doc.add_paragraph(f'Original:NMSE = {metrics_original[ds_name][model][\"nmse\"]}, NMAE = {metrics_original[ds_name][model][\"nmae\"]}, MSE = {metrics_original[ds_name][model][\"mse\"]}, MAE = {metrics_original[ds_name][model][\"mae\"]}')\n",
    "           \n",
    "        nl=0.05\n",
    "        for i in range(1,11):\n",
    "            if nl in metrics_noisy[i] and ds_name in metrics_noisy[i][nl] and model in metrics_noisy[i][nl][ds_name]:\n",
    "                doc.add_paragraph(f'Noisy dataset {i} with Noisy (level {nl}):NMSE = {metrics_noisy[i][nl][ds_name][model][\"nmse\"]}, NMAE = {metrics_noisy[i][nl][ds_name][model][\"nmae\"]}, MSE = {metrics_noisy[i][nl][ds_name][model][\"mse\"]}, MAE = {metrics_noisy[i][nl][ds_name][model][\"mae\"]}')\n",
    "                   \n",
    "        excel_path = os.path.join(output_dir, f'predictions_samples_{ds_name}_{model}_{num_samples}.xlsx')\n",
    "        with pd.ExcelWriter(excel_path) as writer:\n",
    "            pd.DataFrame(out_original[ds_name][model]['samples']).to_excel(writer, sheet_name='Original Samples')\n",
    "            pd.DataFrame(dataset_original[ds_name][1]).to_excel(writer, sheet_name='Original test dataset')\n",
    "            pd.DataFrame(out_original[ds_name][model]['median']).to_excel(writer, sheet_name='Original pred')\n",
    "            nl=0.05\n",
    "            results = []\n",
    "            for i in range(1,11):\n",
    "                result_dict = {'dataset': f'noisy_seq_{i}','NMSE':metrics_noisy[i][nl][ds_name][model][\"nmse\"], 'NMAE':metrics_noisy[i][nl][ds_name][model][\"nmae\"],'MSE':metrics_noisy[i][nl][ds_name][model][\"mse\"], 'MAE':metrics_noisy[i][nl][ds_name][model][\"mae\"]}\n",
    "                for j, value in enumerate(out_noisy[i][nl][ds_name][model]['median']):\n",
    "                    result_dict[f'{j}'] = value\n",
    "                \n",
    "                results.append(result_dict)\n",
    "            df = pd.DataFrame(results)\n",
    "            numeric_columns = [col for col in df.columns if col.isdigit()]\n",
    "            variance_values = df[numeric_columns].var().to_dict()\n",
    "            mean_values = df[numeric_columns].mean().to_dict() \n",
    "\n",
    "            mean_row = {'dataset': 'mean'}  \n",
    "            mean_row.update(mean_values) \n",
    "            df = pd.concat([df, pd.DataFrame([mean_row])], ignore_index=True)\n",
    "            \n",
    "            variance_row = {'dataset': 'var'}\n",
    "            variance_row.update(variance_values)\n",
    "           \n",
    "            df = pd.concat([df, pd.DataFrame([variance_row])], ignore_index=True)\n",
    "            pd.DataFrame(df).to_excel(writer, sheet_name=f'predict result(nosiy level{nl})')\n",
    "            for i in range(1,11):\n",
    "                pd.DataFrame(out_noisy[i][nl][ds_name][model]['samples']).to_excel(writer, sheet_name=f'Noisy Samples {i}(nosiy level{nl})')\n",
    "                pd.DataFrame(noisy_sequences[f\"noisy_seq_{i}\"][nl][ds_name][1]).to_excel(writer, sheet_name='Noisy test dataset')\n",
    "\n",
    "        doc_path = os.path.join(output_dir, f'model_predictions_results_{ds_name}_{model}_{num_samples}.docx')\n",
    "        doc.save(doc_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Metric"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#memeorization\n",
    "memeorization = [\n",
    "    'IstanbulTraffic', #267\n",
    "    'TSMCStock', #247\n",
    "    'TurkeyPower' ,\n",
    "    ]#365\n",
    "#informer\n",
    "informer_ETTh1=['ETTh1_1',\n",
    "    'ETTh1_2',\n",
    "    'ETTh1_3',\n",
    "    'ETTh1_4',\n",
    "    'ETTh1_5',\n",
    "    'ETTh1_6',\n",
    "    'ETTh1_7',]\n",
    "informer_ETTh2=['ETTh2_1',\n",
    "    'ETTh2_2',\n",
    "    'ETTh2_3',\n",
    "    'ETTh2_4',\n",
    "    'ETTh2_5',\n",
    "    'ETTh2_6',\n",
    "    'ETTh2_7',]\n",
    "informer_ETTm1=['ETTm1_1',\n",
    "    'ETTm1_2',\n",
    "    'ETTm1_3',\n",
    "    'ETTm1_4',\n",
    "    'ETTm1_5',\n",
    "    'ETTm1_6',\n",
    "    'ETTm1_7',]\n",
    "informer_ETTm2=['ETTm2_1',\n",
    "    'ETTm2_2',\n",
    "    'ETTm2_3',\n",
    "    'ETTm2_4',\n",
    "    'ETTm2_5',\n",
    "    'ETTm2_6',\n",
    "    'ETTm2_7',\n",
    "]\n",
    "informer_exchange=['exchange_rate_1',\n",
    "        'exchange_rate_2',\n",
    "        'exchange_rate_3',\n",
    "        'exchange_rate_4',\n",
    "        'exchange_rate_5',\n",
    "        'exchange_rate_6',\n",
    "        'exchange_rate_7',\n",
    "        'exchange_rate_8',]\n",
    "informer_national=['national_illness_1',\n",
    "        'national_illness_2',\n",
    "        'national_illness_3',\n",
    "        'national_illness_4',\n",
    "        'national_illness_5',\n",
    "        'national_illness_6',\n",
    "        'national_illness_7',]\n",
    "darts=['AirPassengersDataset', #144 29\n",
    "        'AusBeerDataset', #168 43\n",
    "        'GasRateCO2Dataset', # multivariate #500 60\n",
    "        'MonthlyMilkDataset', #144 34\n",
    "        'SunspotsDataset', # very big, need to subsample? #  141\n",
    "        'WineDataset', #159 36\n",
    "        'WoolyDataset', #336 24\n",
    "        'HeartRateDataset',]# also subsample. # 90\n",
    "model=['LLMTime GPT-3.5-turbo', 'LLMTime GPT-3', 'LLMTime GPT-4', 'claude-3-5-haiku-20241022', 'claude-3-5-sonnet-20240620', 'glm-4-long','gemini-2.0-flash-lite','qwen-turbo', 'Qwen2.5-32B-Instruct',\n",
    "       \"deepseek-r1\",'deepseek-v3','grok-2-1212']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "memor_nmse={}\n",
    "for m in model:\n",
    "    memor_nmse[m]={}\n",
    "    for ds in memeorization:\n",
    "        try:\n",
    "            file_path = f'Memorization/LLMS/uncertainty/predictions_samples_{ds}_{m}_3.xlsx'\n",
    "            predict = pd.read_excel(file_path, sheet_name='predict result(nosiy level0.05)')\n",
    "            #signal=predict.iloc[10,6:].values.astype(float)\n",
    "            signal = predict.iloc[0:10, 6:].median().values.astype(float)\n",
    "            variance=predict.iloc[11,6:].values.astype(float)\n",
    "            te=pd.read_excel(file_path, sheet_name='Noisy test dataset')\n",
    "            test=te.iloc[:,1].values.astype(float)\n",
    "            mse=mean_squared_error(test, signal)\n",
    "            var = np.var(test)\n",
    "            nmse = mse / var\n",
    "            memor_nmse[m][ds]=nmse\n",
    "        except Exception as e:\n",
    "                print(f\"Error with {ds} and model {m}: {e}\")\n",
    "                continue\n",
    "\n",
    "print(memor_nmse)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from data.metrics import calculate_crps\n",
    "memor_crps={}\n",
    "model=['LLMTime GPT-3.5-turbo', 'LLMTime GPT-3', 'LLMTime GPT-4', 'claude-3-5-haiku-20241022', 'claude-3-5-sonnet-20240620', 'glm-4-long', 'qwen-turbo', 'moonshot-v1-8k', 'moonshot-v1-32k',\n",
    "     \"deepseek-r1\"]\n",
    "for m in model:\n",
    "    memor_crps[m]={}\n",
    "    for ds in memeorization:\n",
    "        try:\n",
    "            file_path = f'Memorization/LLMS/uncertainty/predictions_samples_{ds}_{m}_3.xlsx'\n",
    "            predict = pd.read_excel(file_path, sheet_name='predict result(nosiy level0.05)')\n",
    "            #signal=predict.iloc[10,6:].values.astype(float)\n",
    "            signal = predict.iloc[0:10, 6:].median().values.astype(float)\n",
    "            variance=predict.iloc[11,6:].values.astype(float)\n",
    "            te=pd.read_excel(file_path, sheet_name='Noisy test dataset')\n",
    "            test=te.iloc[:,1].values.astype(float)\n",
    "            crps=calculate_crps(test, signal, num_quantiles=20)\n",
    "            memor_crps[m][ds]=crps\n",
    "        except Exception as e:\n",
    "                print(f\"Error with {ds} and model {m}: {e}\")\n",
    "                continue\n",
    "print(memor_crps)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from data.metrics import calculate_crps\n",
    "import numpy as np\n",
    "memor_nll={}\n",
    "model=['LLMTime GPT-3.5-turbo', 'LLMTime GPT-3', 'LLMTime GPT-4', 'claude-3-5-haiku-20241022', 'claude-3-5-sonnet-20240620', 'glm-4-long', 'qwen-turbo', 'moonshot-v1-8k', 'moonshot-v1-32k',\n",
    "     \"deepseek-r1\"]\n",
    "for m in model:\n",
    "    memor_nll[m]={}\n",
    "    for ds in memeorization:\n",
    "        try:\n",
    "            file_path = f'Memorization/LLMS/uncertainty/predictions_samples_{ds}_{m}_3.xlsx'\n",
    "            predict = pd.read_excel(file_path, sheet_name='predict result(nosiy level0.05)')\n",
    "            #signal=predict.iloc[10,6:].values.astype(float)\n",
    "            signal = predict.iloc[0:10, 6:].median().values.astype(float)\n",
    "            variance=predict.iloc[11,6:].values.astype(float)\n",
    "            te=pd.read_excel(file_path, sheet_name='Noisy test dataset')\n",
    "            test=te.iloc[:,1].values.astype(float)\n",
    "            nll = -np.mean(\n",
    "    np.log(1 / np.sqrt(2 * np.pi * variance)) - (test -signal) ** 2 / (2 * variance)\n",
    ")\n",
    "            memor_nll[m][ds]=nll\n",
    "        except Exception as e:\n",
    "                print(f\"Error with {ds} and model {m}: {e}\")\n",
    "                continue\n",
    "print(memor_nll)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
