{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6f1faaae",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  Fold 1: RMSE = 13.5443, MAD = 10.7424\n",
      "  Fold 2: RMSE = 12.7173, MAD = 10.0367\n",
      "  Fold 3: RMSE = 13.6049, MAD = 10.7232\n",
      "  Fold 4: RMSE = 13.1162, MAD = 10.2348\n",
      "  Fold 5: RMSE = 12.9869, MAD = 10.0145\n",
      "  Fold 6: RMSE = 13.3349, MAD = 10.7308\n",
      "  Fold 7: RMSE = 15.7062, MAD = 12.5712\n",
      "  Fold 8: RMSE = 14.3966, MAD = 11.2867\n",
      "  Fold 9: RMSE = 13.4556, MAD = 10.7511\n",
      "  Fold 10: RMSE = 13.3374, MAD = 10.4970\n",
      "GSE40279: mean RMSE = 13.6200, mean MAD = 10.7588\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.metrics import mean_squared_error, mean_absolute_error\n",
    "from pathlib import Path\n",
    "\n",
    "# Set path\n",
    "base_path = Path(\"C:/Document/Serieux/Travail/Data_analysis_and_papers/nash_experiement/data_split\")\n",
    "result_path = Path(\"C:/Document/Serieux/Travail/Data_analysis_and_papers/nash_experiement/results_realdata\")\n",
    "\n",
    "# Set device\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "# Define simple feedforward net\n",
    "class FeedforwardNN(nn.Module):\n",
    "    def __init__(self, input_dim):\n",
    "        super().__init__()\n",
    "        self.model = nn.Sequential(\n",
    "            nn.Linear(input_dim, 128),\n",
    "            nn.ReLU(),\n",
    "            nn.Linear(128, 64),\n",
    "            nn.ReLU(),\n",
    "            nn.Linear(64, 1)\n",
    "        )\n",
    "\n",
    "    def forward(self, x):\n",
    "        return self.model(x)\n",
    "\n",
    "# Training and evaluation function\n",
    "def run_nn(dataset_name):\n",
    "    dataset_path = base_path / dataset_name\n",
    "    if not dataset_path.exists():\n",
    "        print(f\"Dataset folder '{dataset_name}' not found, skipping.\")\n",
    "        return\n",
    "\n",
    "    rmses, mads = [], []\n",
    "    for k in range(1, 11):\n",
    "        try:\n",
    "            X_train = pd.read_csv(dataset_path / f\"X_train{k}.csv\").values.astype(np.float32)\n",
    "            y_train = pd.read_csv(dataset_path / f\"y_train{k}.csv\").values.astype(np.float32).flatten()\n",
    "            X_test = pd.read_csv(dataset_path / f\"X_test{k}.csv\").values.astype(np.float32)\n",
    "            y_test = pd.read_csv(dataset_path / f\"y_test{k}.csv\").values.astype(np.float32).flatten()\n",
    "        except FileNotFoundError as e:\n",
    "            print(f\"  Fold {k}: missing file — skipping this fold.\")\n",
    "            continue\n",
    "\n",
    "        # Move data to torch\n",
    "        X_train_tensor = torch.tensor(X_train).to(device)\n",
    "        y_train_tensor = torch.tensor(y_train).unsqueeze(1).to(device)\n",
    "        X_test_tensor = torch.tensor(X_test).to(device)\n",
    "\n",
    "        # Define model\n",
    "        model = FeedforwardNN(X_train.shape[1]).to(device)\n",
    "        optimizer = optim.Adam(model.parameters(), lr=1e-3)\n",
    "        loss_fn = nn.MSELoss()\n",
    "\n",
    "        # Train model\n",
    "        model.train()\n",
    "        for epoch in range(100):\n",
    "            optimizer.zero_grad()\n",
    "            y_pred = model(X_train_tensor)\n",
    "            loss = loss_fn(y_pred, y_train_tensor)\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "\n",
    "        # Predict\n",
    "        model.eval()\n",
    "        with torch.no_grad():\n",
    "            y_pred_test = model(X_test_tensor).cpu().numpy().flatten()\n",
    "\n",
    "        # Compute metrics\n",
    "        rmse = np.sqrt(mean_squared_error(y_test, y_pred_test))\n",
    "        mad = mean_absolute_error(y_test, y_pred_test)\n",
    "        rmses.append(rmse)\n",
    "        mads.append(mad)\n",
    "        print(f\"  Fold {k}: RMSE = {rmse:.4f}, MAD = {mad:.4f}\")\n",
    "\n",
    "    # Save results if any folds succeeded\n",
    "    if rmses:\n",
    "        df = pd.DataFrame({'RMSE': rmses, 'MAD': mads})\n",
    "        df.to_csv(result_path / f\"{dataset_name}_pytorch_nn.csv\", index=False)\n",
    "        print(f\"{dataset_name}: mean RMSE = {np.mean(rmses):.4f}, mean MAD = {np.mean(mads):.4f}\\n\")\n",
    "    else:\n",
    "        print(f\"{dataset_name}: no valid folds to evaluate.\\n\")\n",
    "\n",
    "# Run for all datasets\n",
    "datasets = [ \"GSE40279\"]  #[\"Airpassenger\", \"SNP500\", \"spaRNA_seq\", \"TCGA\", \"GSE40279\"]\n",
    "for dname in datasets:\n",
    "    run_nn(dname)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7ebfaf0d",
   "metadata": {},
   "outputs": [],
   "source": [
    " "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ml_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
