{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import pandas as pd\n",
    "import re\n",
    "\n",
    "import numpy as np \n",
    "import pandas as pd \n",
    "import matplotlib.pyplot as plt\n",
    "import re\n",
    "import seaborn as sns\n",
    "from matplotlib import pyplot\n",
    "from matplotlib.ticker import  SymmetricalLogLocator\n",
    "\n",
    "def extract_weights(row, col): \n",
    "    if type(row[col]) == str: \n",
    "        numbers = re.findall(r'-?\\d+\\.\\d+(?:[eE][-+]?\\d+)?|-?\\d+(?:[eE][-+]?\\d+)?', row[col])\n",
    "        numbers = [float(num) for num in numbers]\n",
    "    else: \n",
    "        if np.isnan(row[col]) or row[col] is None: \n",
    "            #print(row)\n",
    "            return None\n",
    "        else: \n",
    "            numbers = row[col]\n",
    "            #print(numbers)\n",
    "    #norm = np.linalg.norm(numbers, ord = 1) \n",
    "    return numbers\n",
    "\n",
    "def create_coeff_plt_df(data): \n",
    "    result_list = []\n",
    "    data['weights_trees_2']= data.apply(lambda row: extract_weights(row, 'weigths_trees_2'), axis = 1)\n",
    "    data['weights_trees_1']= data.apply(lambda row: extract_weights(row, 'weights_trees_1'), axis = 1)\n",
    "    for idx, row in data.iterrows(): \n",
    "        if row['weights_trees_2'] is not None: \n",
    "            for t , weight in enumerate(row['weights_trees_2']): \n",
    "                #print(len(row['weights_trees_2']))\n",
    "                #print(len(row['weights_trees_1']))\n",
    "                result_list.append({\n",
    "                'index': t,\n",
    "                'seed': row['seed'],\n",
    "                #'fold': row['fold'],\n",
    "                'n_trees': row['n_trees'], \n",
    "            'n_layers': row['n_layers'], \n",
    "            'tree_depth': row['tree_depth'], \n",
    "            'la_trees': row['la_trees'],\n",
    "            'la_layers': row['la_layers'],\n",
    "            'rmse': row['rmse'],\n",
    "            'rmse_rescaled': row['rmse_rescaled'],\n",
    "            'coefficient_2': float(weight), \n",
    "            'coefficient_1': float(row['weights_trees_1'][t]),\n",
    "            'coeff_norm': np.linalg.norm([float(weight), float(row['weights_trees_1'][t])], ord = 1),\n",
    "            'prod' : float(weight) * row['weights_trees_1'][t],\n",
    "            'abs_prod': abs(float(weight) * row['weights_trees_1'][t])\n",
    "\n",
    "                })\n",
    "            else: \n",
    "                result_list.append({\n",
    "                'index': t,\n",
    "                #'fold': row['fold'],\n",
    "                'seed': row['seed'],\n",
    "                'n_trees': row['n_trees'], \n",
    "            'n_layers': row['n_layers'], \n",
    "            'tree_depth': row['tree_depth'], \n",
    "            'la_trees': row['la_trees'],\n",
    "            'la_layers': row['la_layers'],\n",
    "            'rmse': row['rmse'],\n",
    "            'rmse_rescaled': row['rmse_rescaled'],\n",
    "            'coefficient_2': None, \n",
    "            'coefficient_1': None,\n",
    "            'coeff_norm': None,\n",
    "            'prod' : None,\n",
    "            'abs_prod' : None\n",
    "\n",
    "                })\n",
    "    result_df = pd.DataFrame(result_list)\n",
    "    return result_df\n",
    "\n",
    "\n",
    "def add_entries(df): \n",
    "    lambda_min = 1e-4\n",
    "    lambda_max = 0.1\n",
    "    lambda_seq_len = 13\n",
    "    lambda_seq = np.exp(np.linspace(np.log(lambda_max), np.log(lambda_min), lambda_seq_len))\n",
    "    lambda_seq = np.concatenate([lambda_seq, [0]])\n",
    "    seed = df.at[0,'seed']\n",
    "    for la in lambda_seq: \n",
    "        if round(la, 6) not in np.around(df['la_trees'].values, 6):\n",
    "        # Add new row with 'la' in 'la_trees' and '500' in 'n_trees'\n",
    "            df = pd.concat([df, pd.DataFrame({'la_trees': [la], 'seed': seed, 'count_trees': [0]})], ignore_index=True)\n",
    "    \n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ----------------------- code to create overview files -------------------\n",
    "\n",
    "# df_28_final = pd.read_csv('../node/results_csv/28__500_4_sgd_400.csv')\n",
    "# df_37_final = pd.read_csv('../node/results_csv/37__500_4_sgd_400.csv', index_col=0)\n",
    "# df_46_final = pd.read_csv('../node/results_csv/46__500_4_sgd_400.csv', index_col=0)\n",
    "# df_55_final = pd.read_csv('../node/results_csv/55__500_4_sgd_400.csv', index_col=0)\n",
    "\n",
    "# data_cmpl_list = [df_28_final, df_37_final, df_46_final, df_55_final]\n",
    "\n",
    "# df_plt_28 = create_coeff_plt_df(df_28_final)\n",
    "# df_plt_37 = create_coeff_plt_df(df_37_final)\n",
    "# df_plt_46 = create_coeff_plt_df(df_46_final)\n",
    "# df_plt_55 = create_coeff_plt_df(df_55_final)\n",
    "\n",
    "# count_list = []\n",
    "# for df in data_cmpl_list: \n",
    "#     data = df[df.ovp == True]\n",
    "#     print(data)\n",
    "#     data.drop_duplicates(inplace = True)\n",
    "#     data_long = create_coeff_plt_df(data)\n",
    "#     data_long = data_long[data_long['abs_prod'] > np.finfo(np.float32).eps]\n",
    "    \n",
    "#     #data_count = data_long.groupby(['seed', 'la_trees']).size().reset_index(name='count_trees')\n",
    "#     data_count = data_long.value_counts(subset=['la_trees', 'seed', \"rmse_rescaled\"]).reset_index(name='count_trees')\n",
    "#     data_count_filled = add_entries(data_count)\n",
    "#     count_list.append(data_count_filled)\n",
    "\n",
    "# count_df = pd.concat(count_list)\n",
    "\n",
    "# ov_df = count_df.groupby(\"la_trees\")['count_trees'].describe().reset_index()\n",
    "# ov_df = ov_df[['la_trees', 'mean', 'std']]\n",
    "# ov_df = ov_df.drop([19])\n",
    "# ov_df = ov_df.rename(columns={'mean': 'mean_nmb_trees', 'std': 'std_nmb_trees'})\n",
    "\n",
    "# data_cmpl = pd.concat(data_cmpl_list) \n",
    "\n",
    "# count_df.to_csv(\"sparsity_overview.csv\", index = 0)\n",
    "# ov_df.to_csv(\"overview_performance.csv\", index = 0)\n",
    "# data_cmpl.to_csv(\"performance_weights_cmplt.csv\", index=0)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_cmpl = pd.read_csv(\"../node/results_csv/performance_weights_cmplt.csv\")\n",
    "count_df = pd.read_csv(\"../node/results_csv/sparsity_overview.csv\")\n",
    "df_plt_28 = pd.read_csv('../node/results_csv/plt_28__500_4_sgd_400.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(1, 3, figsize=(25, 5))\n",
    "sns.lineplot(ax = axes[0], data=count_df, x=\"la_trees\", y=\"count_trees\", errorbar=('sd', 10), marker = \"o\", hue = \"seed\")\n",
    "axes[0].set_xscale('symlog', linthresh=0.001)\n",
    "axes[0].set_xlim(left=0)\n",
    "axes[0].set_xlabel(r\"$\\lambda$ (log10)\")\n",
    "axes[0].set_ylabel(\"Nmb. of trees\")\n",
    "axes[0].grid(True)\n",
    "\n",
    "sns.lineplot(ax = axes[1], data=data_cmpl, x=\"la_trees\", y=\"rmse_rescaled\", errorbar=('sd', 1), marker = \"o\")\n",
    "axes[1].set_xscale('symlog', linthresh=0.001)\n",
    "axes[1].set_xlim(left=0, right = 0.1)\n",
    "axes[1].set_xlabel(r\"$\\lambda$ (log10)\")\n",
    "axes[1].set_ylabel(\"Test RMSE\")\n",
    "axes[1].grid(True)\n",
    "\n",
    "sns.lineplot(ax = axes[2], data=df_plt_28, x=\"la_trees\", y=\"coeff_norm\", hue = \"index\", marker = \"o\")\n",
    "axes[2].set_xscale('log')\n",
    "axes[2].set_yscale('log')\n",
    "axes[2].set_xlim(left=0, right = 0.1)\n",
    "axes[2].set_xlabel(r\"$\\lambda$ (log10)\")\n",
    "axes[2].set_ylabel(\"L1-Norm (log10)\")\n",
    "axes[2].legend_.remove()\n",
    "axes[2].grid(True)\n",
    "\n",
    "\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
