{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from LLM_agent_HPT import * # main experiment function for hyperparameter tuning task\n",
    "from helper_func import * \n",
    "random.seed(20250414)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Physical model data simulation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Robot arm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.stats import qmc              # For Latin Hypercube Sampling (LHS)\n",
    "from sklearn.preprocessing import MinMaxScaler  # For normalization\n",
    "torch.set_default_dtype(torch.float64)\n",
    "\n",
    "# Robot Arm Function\n",
    "def robot_arm(x):\n",
    "    theta1, theta2, theta3, theta4 = x[:, 0], x[:, 1], x[:, 2], x[:, 3]\n",
    "    L1, L2, L3, L4 = x[:, 4], x[:, 5], x[:, 6], x[:, 7]\n",
    "\n",
    "    # Cumulative angles\n",
    "    theta12 = theta1 + theta2\n",
    "    theta123 = theta12 + theta3\n",
    "    theta1234 = theta123 + theta4\n",
    "\n",
    "    # Compute u and v\n",
    "    u = (L1 * np.cos(theta1) +\n",
    "         L2 * np.cos(theta12) +\n",
    "         L3 * np.cos(theta123) +\n",
    "         L4 * np.cos(theta1234))\n",
    "\n",
    "    v = (L1 * np.sin(theta1) +\n",
    "         L2 * np.sin(theta12) +\n",
    "         L3 * np.sin(theta123) +\n",
    "         L4 * np.sin(theta1234))\n",
    "\n",
    "    # Distance from origin\n",
    "    return np.sqrt(u**2 + v**2)\n",
    "\n",
    "# Input bounds\n",
    "bounds_robot = np.array([\n",
    "    [0, 2 * np.pi],  # theta1\n",
    "    [0, 2 * np.pi],  # theta2\n",
    "    [0, 2 * np.pi],  # theta3\n",
    "    [0, 2 * np.pi],  # theta4\n",
    "    [0, 1],          # L1\n",
    "    [0, 1],          # L2\n",
    "    [0, 1],          # L3\n",
    "    [0, 1],          # L4\n",
    "])\n",
    "\n",
    "# Latin Hypercube Sampling\n",
    "sampler = qmc.LatinHypercube(d=8)\n",
    "X_sample = sampler.random(n=1000)\n",
    "X_ro = qmc.scale(X_sample, bounds_robot[:, 0], bounds_robot[:, 1])\n",
    "y_robot = robot_arm(X_ro).reshape(-1, 1)\n",
    "\n",
    "# Normalize input\n",
    "X_scaled_robot = MinMaxScaler().fit_transform(X_ro)\n",
    "\n",
    "# Add Gaussian noise\n",
    "y_tensor = torch.tensor(y_robot)\n",
    "signal_var = torch.var(y_tensor)\n",
    "noise = torch.normal(mean=0.0, std=torch.sqrt(0.1 * signal_var).item(), size=y_tensor.shape)\n",
    "y_ro = (y_tensor + noise).numpy().ravel()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### piston data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "random.seed(20250414)\n",
    "torch.set_default_dtype(torch.float64)\n",
    "\n",
    "# Piston Simulation Function\n",
    "def piston_sim(x):\n",
    "    M, S, V0, k, P0, Ta, T0 = [x[:, i] for i in range(7)]\n",
    "    A = P0 * S**2 * V0 / T0 * (1 - T0 / Ta)\n",
    "    C = 2 * np.pi * np.sqrt(M / (k + A))\n",
    "    return C\n",
    "\n",
    "# Input bounds\n",
    "bounds_piston = np.array([\n",
    "    [30, 60],          # M\n",
    "    [0.005, 0.020],    # S\n",
    "    [0.002, 0.010],    # V0\n",
    "    [1000, 5000],      # k\n",
    "    [90000, 110000],   # P0\n",
    "    [290, 296],        # Ta\n",
    "    [340, 360]         # T0\n",
    "])\n",
    "\n",
    "# Sampling\n",
    "sampler = qmc.LatinHypercube(d=7)\n",
    "X_sample = sampler.random(n=1000)\n",
    "X_piston = qmc.scale(X_sample, bounds_piston[:, 0], bounds_piston[:, 1])\n",
    "y_piston = piston_sim(X_piston).reshape(-1, 1)\n",
    "X_pi = X_piston\n",
    "# Normalize\n",
    "X_scaled_piston = MinMaxScaler().fit_transform(X_piston)\n",
    "\n",
    "# Add noise\n",
    "y_tensor = torch.tensor(y_piston)\n",
    "signal_var = torch.var(y_tensor)\n",
    "noise = torch.normal(mean=0.0, std=torch.sqrt(0.1 * signal_var).item(), size=y_tensor.shape)\n",
    "y_pi = (y_tensor + noise).numpy().ravel()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### self-defined training function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.svm import SVR\n",
    "from xgboost import XGBRegressor\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.model_selection import cross_val_score\n",
    "\n",
    "def clip(val, min_val, max_val):\n",
    "    return max(min_val, min(val, max_val))\n",
    "\n",
    "def rf_cv_mse(param_list, data_X, data_y):\n",
    "    \"\"\"\n",
    "    param_list = [n_estimators, max_depth, min_samples_split, min_samples_leaf, max_features]\n",
    "    All values in param_list can be floats.\n",
    "    \"\"\"\n",
    "    if isinstance(param_list, torch.Tensor):\n",
    "        param_list = param_list.tolist()    \n",
    "    max_depth = clip(int(param_list[0]), 1, 50)\n",
    "    min_samples_split = clip(int(round(param_list[1])), 2, 20)\n",
    "    min_samples_leaf = clip(int(round(param_list[2])), 1, 20)\n",
    "    max_features = clip(float(param_list[3]), 0.1, 1.0)\n",
    "\n",
    "    model = RandomForestRegressor(\n",
    "        n_estimators=500,\n",
    "        max_depth=max_depth,\n",
    "        min_samples_split=min_samples_split,\n",
    "        min_samples_leaf=min_samples_leaf,\n",
    "        max_features=max_features,\n",
    "        random_state=42,\n",
    "        n_jobs=-1\n",
    "    )\n",
    "\n",
    "    scores = cross_val_score(\n",
    "        model, data_X, data_y, \n",
    "        scoring='neg_mean_squared_error', \n",
    "        cv=10\n",
    "    )\n",
    "\n",
    "    return -np.mean(scores)\n",
    "\n",
    "\n",
    "\n",
    "def svr_cv_mse(param_list, data_X, data_y):\n",
    "    \"\"\"\n",
    "    param_list = [C, epsilon, gamma]\n",
    "    All values can be floats. \n",
    "    \"\"\"\n",
    "    if isinstance(param_list, torch.Tensor):\n",
    "        param_list = param_list.tolist()\n",
    "    C = float(param_list[0])\n",
    "    epsilon = float(param_list[1])\n",
    "    gamma = float(param_list[2])\n",
    "\n",
    "    model = SVR(C=C, epsilon=epsilon, gamma=gamma)\n",
    "\n",
    "    scores = cross_val_score(\n",
    "        model, data_X, data_y,\n",
    "        scoring='neg_mean_squared_error',\n",
    "        cv=10\n",
    "    )\n",
    "\n",
    "    return -np.mean(scores)\n",
    "\n",
    "\n",
    "def xgb_cv_mse(param_list, data_X, data_y):\n",
    "    \"\"\"\n",
    "    param_list = [max_depth, learning_rate, subsample, colsample_bytree]\n",
    "    \"\"\"\n",
    "    if isinstance(param_list, torch.Tensor):\n",
    "        param_list = param_list.tolist()\n",
    "    \n",
    "    n_estimators = 500\n",
    "    max_depth = int(round(param_list[0]))\n",
    "    learning_rate = float(param_list[1])\n",
    "    subsample = float(param_list[2])\n",
    "    colsample_bytree = float(param_list[3])\n",
    "\n",
    "    model = XGBRegressor(\n",
    "        n_estimators=n_estimators,\n",
    "        max_depth=max_depth,\n",
    "        learning_rate=learning_rate,\n",
    "        subsample=subsample,\n",
    "        colsample_bytree=colsample_bytree,\n",
    "        random_state=42,\n",
    "        n_jobs=-1\n",
    "    )\n",
    "\n",
    "    scores = cross_val_score(model, data_X, data_y, scoring='neg_mean_squared_error', cv=10)\n",
    "    return -np.mean(scores)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### bounds of the variables for each machine learning model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# -------------------------------\n",
    "# Random Forest\n",
    "bounds_rf = torch.tensor([\n",
    "    [ -1,     2,     1,   0.1],   # Lower bounds\n",
    "    [ 50,    20,    20,   1.0]    # Upper bounds\n",
    "])\n",
    "rf_params = [100, 10, 5, 2, 0.5]\n",
    "\n",
    "# -------------------------------\n",
    "# Support Vector Regressor (SVR)\n",
    "bounds_svr = torch.tensor([\n",
    "    [1e-2,  1e-4,   1e-4],     # Lower bounds [C, epsilon, gamma]\n",
    "    [1e3,   1.0,    1.0]       # Upper bounds\n",
    "])\n",
    "svr_params = [10.0, 0.1, 0.01]\n",
    "\n",
    "# -------------------------------\n",
    "# XGBoost Regressor\n",
    "bounds_xgb = torch.tensor([\n",
    "    [ 1,    0.01,  0.5,  0.5],   # Lower bounds\n",
    "    [ 10,    0.3,   1.0,  1.0]    # Upper bounds\n",
    "])\n",
    "xgb_params = [150, 6, 0.1, 0.8, 0.8]\n",
    "# -------------------------------\n",
    "# XGBoost Regressor\n",
    "bounds_mlp = torch.tensor([\n",
    "    [10,     1e-6,   1e-4],   # Lower bounds\n",
    "    [500,    1e-1,   1e-1]    # Upper bounds\n",
    "])\n",
    "params = [100, 0.001, 0.01]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### description card for physical simulation data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "ro_SVR = {\n",
    "    'md_name': 'Support Vector Regression',\n",
    "    'md_ndim': 3,\n",
    "    'md_param': 'C: [0.01, 1000.0] (float)\\n'\n",
    "                'epsilon: [0.0001, 1.0] (float)\\n'\n",
    "                'gamma: [0.0001, 1.0] (float)',\n",
    "    'data_nsamp': 1000,\n",
    "    'data_nfeature': 8,\n",
    "    'data_desc': ' The dataset models the position of a planar robotic arm consisting of four rotating joints and link lengths, computing the Euclidean distance of the arm’s endpoint from the origin.'}\n",
    "\n",
    "\n",
    "ro_RF = {\n",
    "    'md_name': 'Random Forest',\n",
    "    'md_ndim': 4,\n",
    "    'md_param': 'max_depth: [-1, 50] (int)\\n'\n",
    "                'min_samples_split: [2, 20] (int)\\n'\n",
    "                'min_samples_leaf: [1, 20] (int)\\n'\n",
    "                'max_features: [0.1, 1.0] (float)',\n",
    "    'data_nsamp': 1000,\n",
    "    'data_nfeature': 8,\n",
    "    'data_desc': ' The dataset models the position of a planar robotic arm consisting of four rotating joints and link lengths, computing the Euclidean distance of the arm’s endpoint from the origin.'}\n",
    "\n",
    "\n",
    "ro_XGB = {\n",
    "    'md_name': 'XGBoost',\n",
    "    'md_ndim': 4,\n",
    "    'md_param': 'max_depth: [1, 10] (int)\\n'\n",
    "                'learning_rate: [0.01, 0.3] (float)\\n'\n",
    "                'subsample: [0.5, 1.0] (float)\\n'\n",
    "                'colsample_bytree: [0.5, 1.0] (float)',\n",
    "    'data_nsamp': 1000,\n",
    "    'data_nfeature': 8,\n",
    "    'data_desc': ' The dataset models the position of a planar robotic arm consisting of four rotating joints and link lengths, computing the Euclidean distance of the arm’s endpoint from the origin.'}\n",
    "\n",
    "##############################################\n",
    "\n",
    "pi_SVR = {\n",
    "    'md_name': 'Support Vector Regression',\n",
    "    'md_ndim': 3,\n",
    "    'md_param': 'C: [0.01, 1000.0] (float)\\n'\n",
    "                'epsilon: [0.0001, 1.0] (float)\\n'\n",
    "                'gamma: [0.0001, 1.0] (float)',\n",
    "    'data_nsamp': 1000,\n",
    "    'data_nfeature': 7,\n",
    "    'data_desc': 'The dataset models the cycle time of a piston moving within a cylinder, based on seven physical input variables including mass, surface area, pressure, and temperature.'}\n",
    "\n",
    "\n",
    "pi_RF = {\n",
    "    'md_name': 'Random Forest',\n",
    "    'md_ndim': 4,\n",
    "    'md_param': 'max_depth: [-1, 50] (int)\\n'\n",
    "                'min_samples_split: [2, 20] (int)\\n'\n",
    "                'min_samples_leaf: [1, 20] (int)\\n'\n",
    "                'max_features: [0.1, 1.0] (float)',\n",
    "    'data_nsamp': 1000,\n",
    "    'data_nfeature': 7,\n",
    "    'data_desc': 'The dataset models the cycle time of a piston moving within a cylinder, based on seven physical input variables including mass, surface area, pressure, and temperature.'}\n",
    "\n",
    "\n",
    "pi_XGB = {\n",
    "    'md_name': 'XGBoost',\n",
    "    'md_ndim': 4,\n",
    "    'md_param': 'max_depth: [1, 10] (int)\\n'\n",
    "                'learning_rate: [0.01, 0.3] (float)\\n'\n",
    "                'subsample: [0.5, 1.0] (float)\\n'\n",
    "                'colsample_bytree: [0.5, 1.0] (float)',\n",
    "    'data_nsamp': 1000,\n",
    "    'data_nfeature': 7,\n",
    "    'data_desc': 'The dataset models the cycle time of a piston moving within a cylinder, based on seven physical input variables including mass, surface area, pressure, and temperature.'}\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Example usage: Piston+XGB with LLINBO-Transient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRANSIENT:   0%|          | 0/1 [00:00<?, ?it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LLM warmstarting response could not be parsed! Retrying...\n",
      "LLM warmstarting response could not be parsed! Retrying...\n",
      "LLM warmstarting response could not be parsed! Retrying...\n",
      "LLM warmstarting response could not be parsed! Retrying...\n",
      "LLM warmstarting response could not be parsed! Retrying...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "TRANSIENT: 100%|██████████| 1/1 [00:44<00:00, 44.89s/it]\n"
     ]
    }
   ],
   "source": [
    "import pickle\n",
    "def loss(xx):\n",
    "    return xgb_cv_mse(xx, X_pi, y_pi)\n",
    "runner = LLMIBO_HPT(\n",
    "    method='transient',\n",
    "    bounds = bounds_xgb,\n",
    "    objective = loss,\n",
    "    dim = 4,\n",
    "    desc = pi_XGB,\n",
    "    T = 20,\n",
    "    T_ini = 4,\n",
    "    T_rep = 1\n",
    "    )\n",
    "\n",
    "histories, regrets = runner.run()\n"
    
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
