{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "0f69edff",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ==========================================\n",
    "# Imports\n",
    "# ==========================================\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "from torch.utils.data import DataLoader, TensorDataset\n",
    "from ucimlrepo import fetch_ucirepo\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
    "from anova_module import ModelAnalysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "741a4ed5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading Dota 2 dataset...\n",
      "Dimensions before cleaning: (102944, 115)\n",
      "Columns dropped: ['gamemode', 'gametype']\n",
      "Dimensions after cleaning: (102944, 113)\n",
      "Training on: cpu\n",
      "Network input dimension: 113\n",
      "\n",
      "Starting training...\n",
      "Epoch 01 | Loss: 0.6791 | Train Acc: 60.45% | Test Acc: 60.02%\n",
      "Epoch 02 | Loss: 0.6653 | Train Acc: 61.12% | Test Acc: 60.09%\n",
      "Epoch 03 | Loss: 0.6618 | Train Acc: 61.89% | Test Acc: 60.12%\n",
      "Epoch 04 | Loss: 0.6588 | Train Acc: 62.49% | Test Acc: 60.12%\n",
      "Epoch 05 | Loss: 0.6559 | Train Acc: 63.38% | Test Acc: 59.58%\n",
      "Epoch 06 | Loss: 0.6517 | Train Acc: 63.80% | Test Acc: 59.04%\n",
      "Epoch 07 | Loss: 0.6475 | Train Acc: 65.03% | Test Acc: 59.19%\n",
      "Epoch 08 | Loss: 0.6439 | Train Acc: 65.38% | Test Acc: 59.56%\n",
      "Epoch 09 | Loss: 0.6399 | Train Acc: 66.26% | Test Acc: 58.91%\n",
      "Epoch 10 | Loss: 0.6335 | Train Acc: 67.26% | Test Acc: 58.69%\n",
      "\n",
      ">>> Final Accuracy: 58.69%\n"
     ]
    }
   ],
   "source": [
    "# ==========================================\n",
    "# Processing\n",
    "# ==========================================\n",
    "\n",
    "# ==========================================\n",
    "# 1. DOWNLOAD & PREPARATION\n",
    "# ==========================================\n",
    "print(\"Downloading Dota 2 dataset...\")\n",
    "dota2 = fetch_ucirepo(id=367)\n",
    "\n",
    "# Retrieve as Pandas DataFrame\n",
    "X = dota2.data.features\n",
    "y = dota2.data.targets\n",
    "\n",
    "print(f\"Dimensions before cleaning: {X.shape}\")\n",
    "\n",
    "cols_to_drop = ['gamemode', 'gametype']\n",
    "\n",
    "# First check if columns exist to avoid errors\n",
    "existing_cols_to_drop = [col for col in cols_to_drop if col in X.columns]\n",
    "X = X.drop(columns=existing_cols_to_drop)\n",
    "\n",
    "print(f\"Columns dropped: {existing_cols_to_drop}\")\n",
    "print(f\"Dimensions after cleaning: {X.shape}\")\n",
    "# ---------------------------------------------------\n",
    "\n",
    "# Target encoding\n",
    "le = LabelEncoder()\n",
    "y_encoded = le.fit_transform(y.values.ravel())\n",
    "\n",
    "# Normalization\n",
    "scaler = StandardScaler()\n",
    "X_scaled = scaler.fit_transform(X)\n",
    "\n",
    "# Split\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded\n",
    ")\n",
    "\n",
    "# Conversion to Tensors\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "print(f\"Training on: {device}\")\n",
    "\n",
    "X_train_t = torch.tensor(X_train, dtype=torch.float32).to(device)\n",
    "y_train_t = torch.tensor(y_train, dtype=torch.long).to(device)\n",
    "X_test_t = torch.tensor(X_test, dtype=torch.float32).to(device)\n",
    "y_test_t = torch.tensor(y_test, dtype=torch.long).to(device)\n",
    "\n",
    "train_loader = DataLoader(TensorDataset(X_train_t, y_train_t), batch_size=256, shuffle=True)\n",
    "test_loader = DataLoader(TensorDataset(X_test_t, y_test_t), batch_size=1024)\n",
    "\n",
    "# ==========================================\n",
    "# 2. MODEL ARCHITECTURE (Deep Funnel)\n",
    "# ==========================================\n",
    "class DotaMLP(nn.Module):\n",
    "    def __init__(self, input_dim):\n",
    "        super(DotaMLP, self).__init__()\n",
    "        \n",
    "        # Architecture: Input -> 1024 -> 512 -> 256 -> 128 -> 64 -> 32 -> 2\n",
    "        \n",
    "        self.layer1 = nn.Sequential(\n",
    "            nn.Linear(input_dim, 1024),\n",
    "            nn.BatchNorm1d(1024),\n",
    "            nn.ReLU(),\n",
    "            nn.Dropout(0.3)\n",
    "        )\n",
    "        \n",
    "        self.layer2 = nn.Sequential(\n",
    "            nn.Linear(1024, 512),\n",
    "            nn.BatchNorm1d(512),\n",
    "            nn.ReLU(),\n",
    "            nn.Dropout(0.3)\n",
    "        )\n",
    "        \n",
    "        self.layer3 = nn.Sequential(\n",
    "            nn.Linear(512, 256),\n",
    "            nn.BatchNorm1d(256),\n",
    "            nn.ReLU(),\n",
    "            nn.Dropout(0.3)\n",
    "        )\n",
    "\n",
    "        self.layer4 = nn.Sequential(\n",
    "            nn.Linear(256, 128),\n",
    "            nn.BatchNorm1d(128),\n",
    "            nn.ReLU(),\n",
    "            nn.Dropout(0.3)\n",
    "        )\n",
    "\n",
    "        self.layer5 = nn.Sequential(\n",
    "            nn.Linear(128, 64),\n",
    "            nn.BatchNorm1d(64),\n",
    "            nn.ReLU(),\n",
    "            nn.Dropout(0.3)\n",
    "        )\n",
    "\n",
    "        self.layer6 = nn.Sequential(\n",
    "            nn.Linear(64, 32),\n",
    "            nn.BatchNorm1d(32),\n",
    "            nn.ReLU()\n",
    "        )\n",
    "        \n",
    "        self.output = nn.Linear(32, 2)\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.layer1(x)\n",
    "        x = self.layer2(x)\n",
    "        x = self.layer3(x)\n",
    "        x = self.layer4(x)\n",
    "        x = self.layer5(x)\n",
    "        x = self.layer6(x)\n",
    "        return self.output(x)\n",
    "\n",
    "# Dynamic initialization (input_dim adapts automatically after column dropping)\n",
    "input_dimension = X_train.shape[1] \n",
    "print(f\"Network input dimension: {input_dimension}\") \n",
    "model = DotaMLP(input_dimension).to(device)\n",
    "\n",
    "criterion = nn.CrossEntropyLoss()\n",
    "optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
    "\n",
    "# ==========================================\n",
    "# 3. TRAINING\n",
    "# ==========================================\n",
    "def calculate_accuracy(loader):\n",
    "    model.eval()\n",
    "    correct = 0\n",
    "    total = 0\n",
    "    with torch.no_grad():\n",
    "        for features, labels in loader:\n",
    "            outputs = model(features)\n",
    "            _, predicted = torch.max(outputs.data, 1)\n",
    "            total += labels.size(0)\n",
    "            correct += (predicted == labels).sum().item()\n",
    "    return correct / total\n",
    "\n",
    "print(\"\\nStarting training...\")\n",
    "epochs = 10  \n",
    "\n",
    "for epoch in range(epochs):\n",
    "    model.train()\n",
    "    running_loss = 0.0\n",
    "    \n",
    "    for inputs, labels in train_loader:\n",
    "        optimizer.zero_grad()\n",
    "        outputs = model(inputs)\n",
    "        loss = criterion(outputs, labels)\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        running_loss += loss.item()\n",
    "        \n",
    "    train_acc = calculate_accuracy(train_loader)\n",
    "    test_acc = calculate_accuracy(test_loader)\n",
    "    print(f\"Epoch {epoch+1:02d} | Loss: {running_loss/len(train_loader):.4f} | Train Acc: {train_acc:.2%} | Test Acc: {test_acc:.2%}\")\n",
    "\n",
    "# ==========================================\n",
    "# 4. FINAL RESULT\n",
    "# ==========================================\n",
    "final_acc = calculate_accuracy(test_loader)\n",
    "print(f\"\\n>>> Final Accuracy: {final_acc:.2%}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "d11549ba",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ==========================================\n",
    "# Function\n",
    "# ==========================================\n",
    "\n",
    "X_numpy = X.to_numpy()\n",
    "r , d = X_numpy.shape\n",
    "\n",
    "# Function of interest (proba class 0)\n",
    "def f_model(X_numpy):\n",
    "    if hasattr(scaler, 'feature_names_in_'):\n",
    "        X_input = pd.DataFrame(X_numpy, columns=scaler.feature_names_in_)\n",
    "    else:\n",
    "        X_input = X_numpy\n",
    "    \n",
    "    X_numpy_scaled = scaler.transform(X_input)\n",
    "    \n",
    "    X_tensor = torch.tensor(X_numpy_scaled, dtype=torch.float32).to(device)\n",
    "    \n",
    "    model.eval()\n",
    "    with torch.no_grad():\n",
    "        logits = model(X_tensor)      \n",
    "        probs = torch.softmax(logits, dim=1)\n",
    "        \n",
    "        predictions_class_1 = probs[:, 0]\n",
    "\n",
    "    return predictions_class_1.cpu().numpy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9b0e2de5",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Constructing Basis Matrix: 100%|\u001b[32m██████████\u001b[0m| 112/112 [00:04<00:00, 27.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computations complete. Results ready.\n",
      "0.35835056703582835 0.015949335255252536 0.0621960291481925\n",
      "CPU times: user 1min 7s, sys: 10.2 s, total: 1min 17s\n",
      "Wall time: 23.7 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# =============================================\n",
    "# Functional ANOVA Decomposition (MAIN EFFECTS)\n",
    "# =============================================\n",
    "\n",
    "A = ModelAnalysis(X_numpy , f_model , 0.109 , 1 , 1e-4) # percentage = 0.109 to have exactly all main effects\n",
    "S , Matrix = A.functional_anova() # sets and f_A(X_A)\n",
    "print(A.get_R2() , A.get_L2_Error() , A.get_L2_Error_rel())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "aaa7901a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Constructing Basis Matrix: 100%|\u001b[32m██████████\u001b[0m| 4117/4117 [31:05<00:00,  2.21it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computations complete. Results ready.\n",
      "0.4137793410121702 0.01457155472040142 0.05682323605468777\n",
      "CPU times: user 58min 30s, sys: 20min 32s, total: 1h 19min 3s\n",
      "Wall time: 38min 48s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# ==========================================\n",
    "# Functional ANOVA Decomposition\n",
    "# ==========================================\n",
    "\n",
    "A = ModelAnalysis(X_numpy , f_model , 4 , 1e-2 , 1e-2)\n",
    "S , Matrix = A.functional_anova() # sets and f_A(X_A)\n",
    "print(A.get_R2() , A.get_L2_Error() , A.get_L2_Error_rel())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "hfd_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
