{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "NEW TRY"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('../../')\n",
    "import torch\n",
    "from torch_geometric.data import Data, Dataset\n",
    "import networkx as nx\n",
    "from networkx.algorithms.centrality import betweenness_centrality\n",
    "\n",
    "from Datasets.synthetics import BA_2grid, BA_2grid_house, ProbingDataset, BA_2grid_to_test, BA_2grid_house_with_node_degree_as_features_and_expand_10_dimensions\n",
    "from sklearn.model_selection import train_test_split\n",
    "from torch_geometric.loader import DataLoader\n",
    "\n",
    "import torch_geometric.utils as pyg_utils\n",
    "import pickle as pkl\n",
    "from torch_geometric.utils import from_networkx\n",
    "import random\n",
    "import torch.nn as nn\n",
    "\n",
    "from models.models_BA_2grid import GIN_framework as framework\n",
    "import torch.nn.functional as F\n",
    "import numpy as np\n",
    "from sklearn.metrics import accuracy_score, f1_score, confusion_matrix\n",
    "import torch.optim as optim\n",
    "\n",
    "from sklearn.metrics import r2_score\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def set_seed(seed):\n",
    "    if seed == -1:\n",
    "        seed = random.randint(0, 1000)\n",
    "    # Pandas also uses np random state by default\n",
    "    np.random.seed(seed)\n",
    "    random.seed(seed)\n",
    "    torch.manual_seed(seed)\n",
    "\n",
    "    # if you are using GPU\n",
    "    torch.cuda.manual_seed(seed)\n",
    "    torch.cuda.manual_seed_all(seed)\n",
    "    torch.backends.cudnn.enabled = False\n",
    "    torch.backends.cudnn.benchmark = False\n",
    "    torch.backends.cudnn.deterministic = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "set_seed(43)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "probe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL = \"RGCN\"\n",
    "DATASET = \"BA_2grid_house\"\n",
    "\n",
    "# import the model\n",
    "from models.models_BA_2grid_house import RGCN_framework as framework\n",
    "# import the dataset\n",
    "from Datasets.synthetics import BA_2grid_house_with_node_degree_as_features_and_expand_10_dimensions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\tomdu\\OneDrive\\Documents\\ENSC_VU\\4A-Vu\\Thesis\\Probing GNN\\ProbingVenv\\lib\\site-packages\\torch_geometric\\data\\in_memory_dataset.py:300: UserWarning: It is not recommended to directly access the internal storage format `data` of an 'InMemoryDataset'. The data of the dataset is already cached, so any modifications to `data` will not be reflected when accessing its elements. Clearing the cache now by removing all elements in `dataset._data_list`. If you are absolutely certain what you are doing, access the internal storage via `InMemoryDataset._data` instead to suppress this warning. Alternatively, you can access stacked individual attributes of every graph via `dataset.{attr_name}`.\n",
      "  warnings.warn(msg)\n"
     ]
    }
   ],
   "source": [
    "# inizialize the framework\n",
    "dataset = BA_2grid_house_with_node_degree_as_features_and_expand_10_dimensions()\n",
    "gnn = framework(dataset,device=\"cpu\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Net(\n",
      "  (conv1): RGCNConv(10, 30, num_relations=2)\n",
      "  (conv2): RGCNConv(30, 30, num_relations=2)\n",
      "  (lin1): Linear(in_features=30, out_features=30, bias=True)\n",
      "  (lin2): Linear(in_features=30, out_features=2, bias=True)\n",
      ")\n",
      "tensor([ 701, 1225,   47,  ...,   40,  423,  494])\n",
      "BA_2grid_house_with_node_degree_as_features_and_expand_10_dimensions(1600)\n"
     ]
    }
   ],
   "source": [
    "# the gnn object contains the train test split and the model.\n",
    "\n",
    "print(gnn.model)\n",
    "print(gnn.train_idx)\n",
    "print(gnn.dataset[gnn.train_idx])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:root:Model loaded from: models/BA_2grid_house_RGCN\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "we loaded the weights\n",
      "Net(\n",
      "  (conv1): RGCNConv(10, 30, num_relations=2)\n",
      "  (conv2): RGCNConv(30, 30, num_relations=2)\n",
      "  (lin1): Linear(in_features=30, out_features=30, bias=True)\n",
      "  (lin2): Linear(in_features=30, out_features=2, bias=True)\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "#now that the model is instaziated, we have to load the weights\n",
    "gnn.load_model(\"models/\"+DATASET+\"_\"+MODEL)\n",
    "print(\"we loaded the weights\")\n",
    "# right now the model has trained weights.\n",
    "print(gnn.model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:root:Train Loss: 0.303, Train Acc: 0.846, Test Loss: 0.368, Test Acc: 0.823\n"
     ]
    }
   ],
   "source": [
    "gnn.evaluate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "import networkx as nx\n",
    "\n",
    "def compute_graph_properties(data):\n",
    "    properties = []\n",
    "    for graph_data in data:\n",
    "        G = nx.from_edgelist(graph_data.edge_index.t().tolist())\n",
    "        num_nodes = G.number_of_nodes()\n",
    "        num_edges = G.number_of_edges()\n",
    "        density = nx.density(G)\n",
    "        avg_path_len = nx.average_shortest_path_length(G) if nx.is_connected(G) else None\n",
    "        num_cliques = len(list(nx.find_cliques(G)))\n",
    "        #small_world = nx.algorithms.smallworld.sigma(G)\n",
    "\n",
    "        properties.append((num_nodes, num_edges, density, avg_path_len, num_cliques)) #, small_world))\n",
    "    return properties\n",
    "\n",
    "train_properties = compute_graph_properties(gnn.dataset[gnn.train_idx])\n",
    "test_properties = compute_graph_properties(gnn.dataset[gnn.test_idx])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "#save the properties in a file\n",
    "# with open(\"results/\"+DATASET+\"_\"+MODEL+\"_train_properties.pkl\", \"wb\") as f:\n",
    "#     pkl.dump(train_properties, f)\n",
    "\n",
    "# with open(\"results/\"+DATASET+\"_\"+MODEL+\"_test_properties.pkl\", \"wb\") as f:\n",
    "#     pkl.dump(test_properties, f)\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "400"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(test_properties)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_features, test_features = gnn.evaluate_with_features2()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1600, 400)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(train_features), len(test_features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(train_features[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### probing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluate to get features\n",
    "train_features, test_features = gnn.evaluate_with_features2()\n",
    "\n",
    "# Extract x_global and x_lin1 embeddings for training set\n",
    "train_x_global = np.array([feat[0] for feat in train_features])\n",
    "train_x_lin1 = np.array([feat[1] for feat in train_features])\n",
    "\n",
    "#take only the first 10 elements\n",
    "# train_x_global = train_x_global[:10]\n",
    "# train_x_lin1 = train_x_lin1[:10]\n",
    "\n",
    "# Extract x_global and x_lin1 embeddings for test set\n",
    "test_x_global = np.array([feat[0] for feat in test_features])\n",
    "test_x_lin1 = np.array([feat[1] for feat in test_features])\n",
    "\n",
    "#take only the first 10 elements\n",
    "# test_x_global = test_x_global[:10]\n",
    "# test_x_lin1 = test_x_lin1[:10]\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "linear regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "# Prepare training data\n",
    "train_x = train_x_global  # or use train_x_lin1 if you prefer\n",
    "train_y = np.array(train_properties)\n",
    "\n",
    "# Prepare testing data\n",
    "test_x = test_x_global  # or use test_x_lin1 if you prefer\n",
    "test_y = np.array(test_properties)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Property 0: Train MSE: 8.223, Test MSE: 8.049\n",
      "Property 1: Train MSE: 10.675, Test MSE: 10.887\n",
      "Property 2: Train MSE: 0.000, Test MSE: 0.000\n",
      "Property 3: Train MSE: 0.241, Test MSE: 0.244\n",
      "Property 4: Train MSE: 11.329, Test MSE: 11.301\n"
     ]
    }
   ],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.metrics import mean_squared_error\n",
    "\n",
    "# Train a regression model for each graph property\n",
    "models = []\n",
    "for i in range(train_y.shape[1]):\n",
    "    model = LinearRegression()\n",
    "    model.fit(train_x, train_y[:, i])\n",
    "    models.append(model)\n",
    "\n",
    "# Evaluate the models\n",
    "for i, model in enumerate(models):\n",
    "    train_pred = model.predict(train_x)\n",
    "    test_pred = model.predict(test_x)\n",
    "    \n",
    "    train_mse = mean_squared_error(train_y[:, i], train_pred)\n",
    "    test_mse = mean_squared_error(test_y[:, i], test_pred)\n",
    "    \n",
    "    print(f'Property {i}: Train MSE: {train_mse:.3f}, Test MSE: {test_mse:.3f}')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Linear model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "\n",
    "class LinearModel(nn.Module):\n",
    "    def __init__(self, input_size, output_size):\n",
    "        super(LinearModel, self).__init__()\n",
    "        self.linear = nn.Linear(input_size, output_size)\n",
    "\n",
    "    def forward(self, x):\n",
    "        return self.linear(x)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Convert to PyTorch tensors\n",
    "train_x = torch.tensor(train_x_global, dtype=torch.float32)\n",
    "train_y = torch.tensor(train_properties, dtype=torch.float32)\n",
    "\n",
    "test_x = torch.tensor(test_x_global, dtype=torch.float32)\n",
    "test_y = torch.tensor(test_properties, dtype=torch.float32)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1600"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(train_x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([28., 30., 30.,  ..., 26., 29., 22.])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_y[:, 4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch [1000/10000], Loss: 65.8506\n",
      "Epoch [2000/10000], Loss: 37.3973\n",
      "Epoch [3000/10000], Loss: 32.1226\n",
      "Epoch [4000/10000], Loss: 29.7836\n",
      "Epoch [5000/10000], Loss: 27.3770\n",
      "Epoch [6000/10000], Loss: 24.9685\n",
      "Epoch [7000/10000], Loss: 22.6811\n",
      "Epoch [8000/10000], Loss: 20.5124\n",
      "Epoch [9000/10000], Loss: 18.4646\n",
      "Epoch [10000/10000], Loss: 16.5123\n",
      "Embedding: tensor([[0.0000, 0.9567, 4.4559,  ..., 0.0000, 0.7612, 3.0745],\n",
      "        [0.0000, 0.4834, 3.9561,  ..., 0.0000, 0.9605, 3.2237],\n",
      "        [1.0854, 0.0000, 0.0000,  ..., 3.2084, 2.7875, 3.3425],\n",
      "        ...,\n",
      "        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],\n",
      "        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],\n",
      "        [0.0000, 0.0000, 1.2693,  ..., 0.0000, 1.3425, 2.8854]])\n",
      "Embedding name: train_x\n",
      "Property: num_nodes\n",
      "  Train MSE: 16.5104, Test MSE: 15.5250\n",
      "  Train R²: -0.9992, Test R²: -0.9291\n",
      "Epoch [1000/10000], Loss: 108.2927\n",
      "Epoch [2000/10000], Loss: 55.5548\n",
      "Epoch [3000/10000], Loss: 44.1973\n",
      "Epoch [4000/10000], Loss: 40.6162\n",
      "Epoch [5000/10000], Loss: 37.6637\n",
      "Epoch [6000/10000], Loss: 34.5619\n",
      "Epoch [7000/10000], Loss: 31.5759\n",
      "Epoch [8000/10000], Loss: 28.7793\n",
      "Epoch [9000/10000], Loss: 26.1413\n",
      "Epoch [10000/10000], Loss: 23.6554\n",
      "Embedding: tensor([[0.0000, 0.9567, 4.4559,  ..., 0.0000, 0.7612, 3.0745],\n",
      "        [0.0000, 0.4834, 3.9561,  ..., 0.0000, 0.9605, 3.2237],\n",
      "        [1.0854, 0.0000, 0.0000,  ..., 3.2084, 2.7875, 3.3425],\n",
      "        ...,\n",
      "        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],\n",
      "        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],\n",
      "        [0.0000, 0.0000, 1.2693,  ..., 0.0000, 1.3425, 2.8854]])\n",
      "Embedding name: train_x\n",
      "Property: num_edges\n",
      "  Train MSE: 23.6530, Test MSE: 22.1374\n",
      "  Train R²: -1.2044, Test R²: -1.0271\n",
      "Epoch [1000/10000], Loss: 0.0018\n",
      "Epoch [2000/10000], Loss: 0.0006\n",
      "Epoch [3000/10000], Loss: 0.0005\n",
      "Epoch [4000/10000], Loss: 0.0004\n",
      "Epoch [5000/10000], Loss: 0.0004\n",
      "Epoch [6000/10000], Loss: 0.0003\n",
      "Epoch [7000/10000], Loss: 0.0003\n",
      "Epoch [8000/10000], Loss: 0.0003\n",
      "Epoch [9000/10000], Loss: 0.0003\n",
      "Epoch [10000/10000], Loss: 0.0003\n",
      "Embedding: tensor([[0.0000, 0.9567, 4.4559,  ..., 0.0000, 0.7612, 3.0745],\n",
      "        [0.0000, 0.4834, 3.9561,  ..., 0.0000, 0.9605, 3.2237],\n",
      "        [1.0854, 0.0000, 0.0000,  ..., 3.2084, 2.7875, 3.3425],\n",
      "        ...,\n",
      "        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],\n",
      "        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],\n",
      "        [0.0000, 0.0000, 1.2693,  ..., 0.0000, 1.3425, 2.8854]])\n",
      "Embedding name: train_x\n",
      "Property: density\n",
      "  Train MSE: 0.0003, Test MSE: 0.0002\n",
      "  Train R²: -0.0683, Test R²: -0.0901\n",
      "Epoch [1000/10000], Loss: 0.8160\n",
      "Epoch [2000/10000], Loss: 0.6751\n",
      "Epoch [3000/10000], Loss: 0.5596\n",
      "Epoch [4000/10000], Loss: 0.4504\n",
      "Epoch [5000/10000], Loss: 0.3557\n",
      "Epoch [6000/10000], Loss: 0.2883\n",
      "Epoch [7000/10000], Loss: 0.2539\n",
      "Epoch [8000/10000], Loss: 0.2437\n",
      "Epoch [9000/10000], Loss: 0.2424\n",
      "Epoch [10000/10000], Loss: 0.2423\n",
      "Embedding: tensor([[0.0000, 0.9567, 4.4559,  ..., 0.0000, 0.7612, 3.0745],\n",
      "        [0.0000, 0.4834, 3.9561,  ..., 0.0000, 0.9605, 3.2237],\n",
      "        [1.0854, 0.0000, 0.0000,  ..., 3.2084, 2.7875, 3.3425],\n",
      "        ...,\n",
      "        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],\n",
      "        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],\n",
      "        [0.0000, 0.0000, 1.2693,  ..., 0.0000, 1.3425, 2.8854]])\n",
      "Embedding name: train_x\n",
      "Property: avg_path_len\n",
      "  Train MSE: 0.2423, Test MSE: 0.2436\n",
      "  Train R²: 0.0079, Test R²: 0.0091\n",
      "Epoch [1000/10000], Loss: 94.3120\n",
      "Epoch [2000/10000], Loss: 50.8669\n",
      "Epoch [3000/10000], Loss: 41.6161\n",
      "Epoch [4000/10000], Loss: 38.5788\n",
      "Epoch [5000/10000], Loss: 35.8113\n",
      "Epoch [6000/10000], Loss: 32.9254\n",
      "Epoch [7000/10000], Loss: 30.1629\n",
      "Epoch [8000/10000], Loss: 27.5722\n",
      "Epoch [9000/10000], Loss: 25.1337\n",
      "Epoch [10000/10000], Loss: 22.8331\n",
      "Embedding: tensor([[0.0000, 0.9567, 4.4559,  ..., 0.0000, 0.7612, 3.0745],\n",
      "        [0.0000, 0.4834, 3.9561,  ..., 0.0000, 0.9605, 3.2237],\n",
      "        [1.0854, 0.0000, 0.0000,  ..., 3.2084, 2.7875, 3.3425],\n",
      "        ...,\n",
      "        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],\n",
      "        [2.0631, 0.0000, 0.0000,  ..., 4.2904, 2.9038, 2.8054],\n",
      "        [0.0000, 0.0000, 1.2693,  ..., 0.0000, 1.3425, 2.8854]])\n",
      "Embedding name: train_x\n",
      "Property: num_cliques\n",
      "  Train MSE: 22.8308, Test MSE: 20.9283\n",
      "  Train R²: -1.0034, Test R²: -0.8530\n",
      "Epoch [1000/10000], Loss: 164.4559\n",
      "Epoch [2000/10000], Loss: 78.4516\n",
      "Epoch [3000/10000], Loss: 41.0143\n",
      "Epoch [4000/10000], Loss: 25.5536\n",
      "Epoch [5000/10000], Loss: 19.5913\n",
      "Epoch [6000/10000], Loss: 17.0730\n",
      "Epoch [7000/10000], Loss: 15.7272\n",
      "Epoch [8000/10000], Loss: 14.7359\n",
      "Epoch [9000/10000], Loss: 13.8695\n",
      "Epoch [10000/10000], Loss: 13.1101\n",
      "Embedding: tensor([[1.1261, 1.1031, 0.2005,  ..., 0.0000, 0.0000, 0.0000],\n",
      "        [1.3993, 0.8120, 0.0000,  ..., 0.0000, 0.0000, 0.0000],\n",
      "        [2.6797, 0.3190, 0.0000,  ..., 0.0000, 0.0000, 0.5787],\n",
      "        ...,\n",
      "        [3.2476, 0.9941, 0.0000,  ..., 0.0000, 0.0000, 1.2789],\n",
      "        [3.2476, 0.9941, 0.0000,  ..., 0.0000, 0.0000, 1.2789],\n",
      "        [2.6308, 0.0440, 0.0000,  ..., 0.0000, 0.2704, 0.0000]])\n",
      "Embedding name: train_x2\n",
      "Property: num_nodes\n",
      "  Train MSE: 13.1094, Test MSE: 147.3574\n",
      "  Train R²: -0.5874, Test R²: -17.3103\n",
      "Epoch [1000/10000], Loss: 255.0439\n",
      "Epoch [2000/10000], Loss: 128.9212\n",
      "Epoch [3000/10000], Loss: 70.0696\n",
      "Epoch [4000/10000], Loss: 41.6355\n",
      "Epoch [5000/10000], Loss: 29.3632\n",
      "Epoch [6000/10000], Loss: 24.1117\n",
      "Epoch [7000/10000], Loss: 21.5982\n",
      "Epoch [8000/10000], Loss: 20.1260\n",
      "Epoch [9000/10000], Loss: 18.9601\n",
      "Epoch [10000/10000], Loss: 17.9283\n",
      "Embedding: tensor([[1.1261, 1.1031, 0.2005,  ..., 0.0000, 0.0000, 0.0000],\n",
      "        [1.3993, 0.8120, 0.0000,  ..., 0.0000, 0.0000, 0.0000],\n",
      "        [2.6797, 0.3190, 0.0000,  ..., 0.0000, 0.0000, 0.5787],\n",
      "        ...,\n",
      "        [3.2476, 0.9941, 0.0000,  ..., 0.0000, 0.0000, 1.2789],\n",
      "        [3.2476, 0.9941, 0.0000,  ..., 0.0000, 0.0000, 1.2789],\n",
      "        [2.6308, 0.0440, 0.0000,  ..., 0.0000, 0.2704, 0.0000]])\n",
      "Embedding name: train_x2\n",
      "Property: num_edges\n",
      "  Train MSE: 17.9273, Test MSE: 83.5742\n",
      "  Train R²: -0.6708, Test R²: -6.6526\n",
      "Epoch [1000/10000], Loss: 0.0007\n",
      "Epoch [2000/10000], Loss: 0.0003\n",
      "Epoch [3000/10000], Loss: 0.0002\n",
      "Epoch [4000/10000], Loss: 0.0002\n",
      "Epoch [5000/10000], Loss: 0.0002\n",
      "Epoch [6000/10000], Loss: 0.0002\n",
      "Epoch [7000/10000], Loss: 0.0002\n",
      "Epoch [8000/10000], Loss: 0.0002\n",
      "Epoch [9000/10000], Loss: 0.0002\n",
      "Epoch [10000/10000], Loss: 0.0002\n",
      "Embedding: tensor([[1.1261, 1.1031, 0.2005,  ..., 0.0000, 0.0000, 0.0000],\n",
      "        [1.3993, 0.8120, 0.0000,  ..., 0.0000, 0.0000, 0.0000],\n",
      "        [2.6797, 0.3190, 0.0000,  ..., 0.0000, 0.0000, 0.5787],\n",
      "        ...,\n",
      "        [3.2476, 0.9941, 0.0000,  ..., 0.0000, 0.0000, 1.2789],\n",
      "        [3.2476, 0.9941, 0.0000,  ..., 0.0000, 0.0000, 1.2789],\n",
      "        [2.6308, 0.0440, 0.0000,  ..., 0.0000, 0.2704, 0.0000]])\n",
      "Embedding name: train_x2\n",
      "Property: density\n",
      "  Train MSE: 0.0002, Test MSE: 0.2427\n",
      "  Train R²: 0.0166, Test R²: -1085.6057\n",
      "Epoch [1000/10000], Loss: 0.5714\n",
      "Epoch [2000/10000], Loss: 0.4339\n",
      "Epoch [3000/10000], Loss: 0.3879\n",
      "Epoch [4000/10000], Loss: 0.3550\n",
      "Epoch [5000/10000], Loss: 0.3303\n",
      "Epoch [6000/10000], Loss: 0.3078\n",
      "Epoch [7000/10000], Loss: 0.2856\n",
      "Epoch [8000/10000], Loss: 0.2665\n",
      "Epoch [9000/10000], Loss: 0.2526\n",
      "Epoch [10000/10000], Loss: 0.2445\n",
      "Embedding: tensor([[1.1261, 1.1031, 0.2005,  ..., 0.0000, 0.0000, 0.0000],\n",
      "        [1.3993, 0.8120, 0.0000,  ..., 0.0000, 0.0000, 0.0000],\n",
      "        [2.6797, 0.3190, 0.0000,  ..., 0.0000, 0.0000, 0.5787],\n",
      "        ...,\n",
      "        [3.2476, 0.9941, 0.0000,  ..., 0.0000, 0.0000, 1.2789],\n",
      "        [3.2476, 0.9941, 0.0000,  ..., 0.0000, 0.0000, 1.2789],\n",
      "        [2.6308, 0.0440, 0.0000,  ..., 0.0000, 0.2704, 0.0000]])\n",
      "Embedding name: train_x2\n",
      "Property: avg_path_len\n",
      "  Train MSE: 0.2445, Test MSE: 3.8947\n",
      "  Train R²: -0.0015, Test R²: -14.8402\n",
      "Epoch [1000/10000], Loss: 193.7868\n",
      "Epoch [2000/10000], Loss: 96.6461\n",
      "Epoch [3000/10000], Loss: 52.1885\n",
      "Epoch [4000/10000], Loss: 33.0875\n",
      "Epoch [5000/10000], Loss: 25.3912\n",
      "Epoch [6000/10000], Loss: 22.0292\n",
      "Epoch [7000/10000], Loss: 20.3148\n",
      "Epoch [8000/10000], Loss: 19.1644\n",
      "Epoch [9000/10000], Loss: 18.1744\n",
      "Epoch [10000/10000], Loss: 17.2917\n",
      "Embedding: tensor([[1.1261, 1.1031, 0.2005,  ..., 0.0000, 0.0000, 0.0000],\n",
      "        [1.3993, 0.8120, 0.0000,  ..., 0.0000, 0.0000, 0.0000],\n",
      "        [2.6797, 0.3190, 0.0000,  ..., 0.0000, 0.0000, 0.5787],\n",
      "        ...,\n",
      "        [3.2476, 0.9941, 0.0000,  ..., 0.0000, 0.0000, 1.2789],\n",
      "        [3.2476, 0.9941, 0.0000,  ..., 0.0000, 0.0000, 1.2789],\n",
      "        [2.6308, 0.0440, 0.0000,  ..., 0.0000, 0.2704, 0.0000]])\n",
      "Embedding name: train_x2\n",
      "Property: num_cliques\n",
      "  Train MSE: 17.2909, Test MSE: 150.5693\n",
      "  Train R²: -0.5173, Test R²: -12.3318\n",
      "Epoch [1000/10000], Loss: 25.3345\n",
      "Epoch [2000/10000], Loss: 9.8208\n",
      "Epoch [3000/10000], Loss: 6.6465\n",
      "Epoch [4000/10000], Loss: 6.2553\n",
      "Epoch [5000/10000], Loss: 5.9760\n",
      "Epoch [6000/10000], Loss: 5.6831\n",
      "Epoch [7000/10000], Loss: 5.4252\n",
      "Epoch [8000/10000], Loss: 5.2284\n",
      "Epoch [9000/10000], Loss: 5.0802\n",
      "Epoch [10000/10000], Loss: 4.9656\n",
      "Embedding: tensor([[4.2342e+00, 1.1031e+00, 2.0049e-01,  ..., 0.0000e+00, 2.7096e+00,\n",
      "         1.3065e+00],\n",
      "        [7.7006e+00, 1.9639e+00, 2.2512e+00,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.7183e+00],\n",
      "        [6.0147e+00, 1.7912e+00, 2.9788e-03,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.1402e+00],\n",
      "        ...,\n",
      "        [6.5831e+00, 2.9113e+00, 6.6763e-01,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.3470e+00],\n",
      "        [4.8816e+00, 1.7134e+00, 1.1351e+00,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         1.8303e+00],\n",
      "        [4.7488e+00, 1.2439e+00, 8.0596e-01,  ..., 0.0000e+00, 5.6622e-01,\n",
      "         1.3292e+00]])\n",
      "Embedding name: train_x_global\n",
      "Property: num_nodes\n",
      "  Train MSE: 4.9655, Test MSE: 182.2397\n",
      "  Train R²: 0.3988, Test R²: -21.6447\n",
      "Epoch [1000/10000], Loss: 46.2035\n",
      "Epoch [2000/10000], Loss: 17.6268\n",
      "Epoch [3000/10000], Loss: 9.8154\n",
      "Epoch [4000/10000], Loss: 8.5725\n",
      "Epoch [5000/10000], Loss: 8.0761\n",
      "Epoch [6000/10000], Loss: 7.6783\n",
      "Epoch [7000/10000], Loss: 7.3340\n",
      "Epoch [8000/10000], Loss: 7.0473\n",
      "Epoch [9000/10000], Loss: 6.8220\n",
      "Epoch [10000/10000], Loss: 6.6570\n",
      "Embedding: tensor([[4.2342e+00, 1.1031e+00, 2.0049e-01,  ..., 0.0000e+00, 2.7096e+00,\n",
      "         1.3065e+00],\n",
      "        [7.7006e+00, 1.9639e+00, 2.2512e+00,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.7183e+00],\n",
      "        [6.0147e+00, 1.7912e+00, 2.9788e-03,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.1402e+00],\n",
      "        ...,\n",
      "        [6.5831e+00, 2.9113e+00, 6.6763e-01,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.3470e+00],\n",
      "        [4.8816e+00, 1.7134e+00, 1.1351e+00,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         1.8303e+00],\n",
      "        [4.7488e+00, 1.2439e+00, 8.0596e-01,  ..., 0.0000e+00, 5.6622e-01,\n",
      "         1.3292e+00]])\n",
      "Embedding name: train_x_global\n",
      "Property: num_edges\n",
      "  Train MSE: 6.6569, Test MSE: 340.1221\n",
      "  Train R²: 0.3796, Test R²: -30.1439\n",
      "Epoch [1000/10000], Loss: 0.0067\n",
      "Epoch [2000/10000], Loss: 0.0024\n",
      "Epoch [3000/10000], Loss: 0.0008\n",
      "Epoch [4000/10000], Loss: 0.0004\n",
      "Epoch [5000/10000], Loss: 0.0002\n",
      "Epoch [6000/10000], Loss: 0.0002\n",
      "Epoch [7000/10000], Loss: 0.0001\n",
      "Epoch [8000/10000], Loss: 0.0001\n",
      "Epoch [9000/10000], Loss: 0.0001\n",
      "Epoch [10000/10000], Loss: 0.0001\n",
      "Embedding: tensor([[4.2342e+00, 1.1031e+00, 2.0049e-01,  ..., 0.0000e+00, 2.7096e+00,\n",
      "         1.3065e+00],\n",
      "        [7.7006e+00, 1.9639e+00, 2.2512e+00,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.7183e+00],\n",
      "        [6.0147e+00, 1.7912e+00, 2.9788e-03,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.1402e+00],\n",
      "        ...,\n",
      "        [6.5831e+00, 2.9113e+00, 6.6763e-01,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.3470e+00],\n",
      "        [4.8816e+00, 1.7134e+00, 1.1351e+00,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         1.8303e+00],\n",
      "        [4.7488e+00, 1.2439e+00, 8.0596e-01,  ..., 0.0000e+00, 5.6622e-01,\n",
      "         1.3292e+00]])\n",
      "Embedding name: train_x_global\n",
      "Property: density\n",
      "  Train MSE: 0.0001, Test MSE: 0.1936\n",
      "  Train R²: 0.5799, Test R²: -865.8724\n",
      "Epoch [1000/10000], Loss: 0.4073\n",
      "Epoch [2000/10000], Loss: 0.3483\n",
      "Epoch [3000/10000], Loss: 0.2988\n",
      "Epoch [4000/10000], Loss: 0.2602\n",
      "Epoch [5000/10000], Loss: 0.2328\n",
      "Epoch [6000/10000], Loss: 0.2133\n",
      "Epoch [7000/10000], Loss: 0.1979\n",
      "Epoch [8000/10000], Loss: 0.1842\n",
      "Epoch [9000/10000], Loss: 0.1722\n",
      "Epoch [10000/10000], Loss: 0.1626\n",
      "Embedding: tensor([[4.2342e+00, 1.1031e+00, 2.0049e-01,  ..., 0.0000e+00, 2.7096e+00,\n",
      "         1.3065e+00],\n",
      "        [7.7006e+00, 1.9639e+00, 2.2512e+00,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.7183e+00],\n",
      "        [6.0147e+00, 1.7912e+00, 2.9788e-03,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.1402e+00],\n",
      "        ...,\n",
      "        [6.5831e+00, 2.9113e+00, 6.6763e-01,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.3470e+00],\n",
      "        [4.8816e+00, 1.7134e+00, 1.1351e+00,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         1.8303e+00],\n",
      "        [4.7488e+00, 1.2439e+00, 8.0596e-01,  ..., 0.0000e+00, 5.6622e-01,\n",
      "         1.3292e+00]])\n",
      "Embedding name: train_x_global\n",
      "Property: avg_path_len\n",
      "  Train MSE: 0.1626, Test MSE: 152.4317\n",
      "  Train R²: 0.3341, Test R²: -618.9626\n",
      "Epoch [1000/10000], Loss: 38.2656\n",
      "Epoch [2000/10000], Loss: 14.2431\n",
      "Epoch [3000/10000], Loss: 7.4622\n",
      "Epoch [4000/10000], Loss: 6.3358\n",
      "Epoch [5000/10000], Loss: 5.9686\n",
      "Epoch [6000/10000], Loss: 5.7839\n",
      "Epoch [7000/10000], Loss: 5.6644\n",
      "Epoch [8000/10000], Loss: 5.5639\n",
      "Epoch [9000/10000], Loss: 5.4709\n",
      "Epoch [10000/10000], Loss: 5.3793\n",
      "Embedding: tensor([[4.2342e+00, 1.1031e+00, 2.0049e-01,  ..., 0.0000e+00, 2.7096e+00,\n",
      "         1.3065e+00],\n",
      "        [7.7006e+00, 1.9639e+00, 2.2512e+00,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.7183e+00],\n",
      "        [6.0147e+00, 1.7912e+00, 2.9788e-03,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.1402e+00],\n",
      "        ...,\n",
      "        [6.5831e+00, 2.9113e+00, 6.6763e-01,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         2.3470e+00],\n",
      "        [4.8816e+00, 1.7134e+00, 1.1351e+00,  ..., 0.0000e+00, 2.2269e+00,\n",
      "         1.8303e+00],\n",
      "        [4.7488e+00, 1.2439e+00, 8.0596e-01,  ..., 0.0000e+00, 5.6622e-01,\n",
      "         1.3292e+00]])\n",
      "Embedding name: train_x_global\n",
      "Property: num_cliques\n",
      "  Train MSE: 5.3792, Test MSE: 199.8603\n",
      "  Train R²: 0.5280, Test R²: -16.6962\n",
      "Epoch [1000/10000], Loss: 60.5965\n",
      "Epoch [2000/10000], Loss: 25.3690\n",
      "Epoch [3000/10000], Loss: 13.3009\n",
      "Epoch [4000/10000], Loss: 9.0178\n",
      "Epoch [5000/10000], Loss: 7.1673\n",
      "Epoch [6000/10000], Loss: 6.3922\n",
      "Epoch [7000/10000], Loss: 6.0115\n",
      "Epoch [8000/10000], Loss: 5.7085\n",
      "Epoch [9000/10000], Loss: 5.4622\n",
      "Epoch [10000/10000], Loss: 5.2863\n",
      "Embedding: tensor([[2.2326, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0849],\n",
      "        [2.4615, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.8944],\n",
      "        [2.1757, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.8878],\n",
      "        ...,\n",
      "        [2.0995, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.9132],\n",
      "        [1.8245, 0.0000, 0.0000,  ..., 0.0478, 0.0000, 0.9295],\n",
      "        [1.0679, 0.0000, 0.0000,  ..., 1.5987, 0.0000, 0.3585]])\n",
      "Embedding name: train_x_lin1\n",
      "Property: num_nodes\n",
      "  Train MSE: 5.2861, Test MSE: 758.2327\n",
      "  Train R²: 0.3599, Test R²: -93.2164\n",
      "Epoch [1000/10000], Loss: 96.9093\n",
      "Epoch [2000/10000], Loss: 42.0816\n",
      "Epoch [3000/10000], Loss: 20.9721\n",
      "Epoch [4000/10000], Loss: 13.4219\n",
      "Epoch [5000/10000], Loss: 10.2413\n",
      "Epoch [6000/10000], Loss: 8.8474\n",
      "Epoch [7000/10000], Loss: 8.2243\n",
      "Epoch [8000/10000], Loss: 7.7587\n",
      "Epoch [9000/10000], Loss: 7.3727\n",
      "Epoch [10000/10000], Loss: 7.0928\n",
      "Embedding: tensor([[2.2326, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0849],\n",
      "        [2.4615, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.8944],\n",
      "        [2.1757, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.8878],\n",
      "        ...,\n",
      "        [2.0995, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.9132],\n",
      "        [1.8245, 0.0000, 0.0000,  ..., 0.0478, 0.0000, 0.9295],\n",
      "        [1.0679, 0.0000, 0.0000,  ..., 1.5987, 0.0000, 0.3585]])\n",
      "Embedding name: train_x_lin1\n",
      "Property: num_edges\n",
      "  Train MSE: 7.0925, Test MSE: 1015.7668\n",
      "  Train R²: 0.3390, Test R²: -92.0107\n",
      "Epoch [1000/10000], Loss: 0.0006\n",
      "Epoch [2000/10000], Loss: 0.0003\n",
      "Epoch [3000/10000], Loss: 0.0002\n",
      "Epoch [4000/10000], Loss: 0.0001\n",
      "Epoch [5000/10000], Loss: 0.0001\n",
      "Epoch [6000/10000], Loss: 0.0001\n",
      "Epoch [7000/10000], Loss: 0.0001\n",
      "Epoch [8000/10000], Loss: 0.0001\n",
      "Epoch [9000/10000], Loss: 0.0001\n",
      "Epoch [10000/10000], Loss: 0.0001\n",
      "Embedding: tensor([[2.2326, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0849],\n",
      "        [2.4615, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.8944],\n",
      "        [2.1757, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.8878],\n",
      "        ...,\n",
      "        [2.0995, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.9132],\n",
      "        [1.8245, 0.0000, 0.0000,  ..., 0.0478, 0.0000, 0.9295],\n",
      "        [1.0679, 0.0000, 0.0000,  ..., 1.5987, 0.0000, 0.3585]])\n",
      "Embedding name: train_x_lin1\n",
      "Property: density\n",
      "  Train MSE: 0.0001, Test MSE: 0.5471\n",
      "  Train R²: 0.5894, Test R²: -2448.3013\n",
      "Epoch [1000/10000], Loss: 0.4730\n",
      "Epoch [2000/10000], Loss: 0.3073\n",
      "Epoch [3000/10000], Loss: 0.2683\n",
      "Epoch [4000/10000], Loss: 0.2451\n",
      "Epoch [5000/10000], Loss: 0.2302\n",
      "Epoch [6000/10000], Loss: 0.2189\n",
      "Epoch [7000/10000], Loss: 0.2074\n",
      "Epoch [8000/10000], Loss: 0.1943\n",
      "Epoch [9000/10000], Loss: 0.1802\n",
      "Epoch [10000/10000], Loss: 0.1667\n",
      "Embedding: tensor([[2.2326, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0849],\n",
      "        [2.4615, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.8944],\n",
      "        [2.1757, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.8878],\n",
      "        ...,\n",
      "        [2.0995, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.9132],\n",
      "        [1.8245, 0.0000, 0.0000,  ..., 0.0478, 0.0000, 0.9295],\n",
      "        [1.0679, 0.0000, 0.0000,  ..., 1.5987, 0.0000, 0.3585]])\n",
      "Embedding name: train_x_lin1\n",
      "Property: avg_path_len\n",
      "  Train MSE: 0.1667, Test MSE: 153.2995\n",
      "  Train R²: 0.3172, Test R²: -622.4924\n",
      "Epoch [1000/10000], Loss: 96.7475\n",
      "Epoch [2000/10000], Loss: 38.7617\n",
      "Epoch [3000/10000], Loss: 18.2273\n",
      "Epoch [4000/10000], Loss: 11.1857\n",
      "Epoch [5000/10000], Loss: 8.6115\n",
      "Epoch [6000/10000], Loss: 7.4398\n",
      "Epoch [7000/10000], Loss: 6.8576\n",
      "Epoch [8000/10000], Loss: 6.4172\n",
      "Epoch [9000/10000], Loss: 6.0451\n",
      "Epoch [10000/10000], Loss: 5.7606\n",
      "Embedding: tensor([[2.2326, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0849],\n",
      "        [2.4615, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.8944],\n",
      "        [2.1757, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.8878],\n",
      "        ...,\n",
      "        [2.0995, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.9132],\n",
      "        [1.8245, 0.0000, 0.0000,  ..., 0.0478, 0.0000, 0.9295],\n",
      "        [1.0679, 0.0000, 0.0000,  ..., 1.5987, 0.0000, 0.3585]])\n",
      "Embedding name: train_x_lin1\n",
      "Property: num_cliques\n",
      "  Train MSE: 5.7604, Test MSE: 553.5195\n",
      "  Train R²: 0.4945, Test R²: -48.0102\n"
     ]
    },
    {
     "ename": "RuntimeError",
     "evalue": "mat1 and mat2 shapes cannot be multiplied (1600x2 and 30x1)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[21], line 81\u001b[0m\n\u001b[0;32m     78\u001b[0m model\u001b[38;5;241m.\u001b[39mtrain()\n\u001b[0;32m     79\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[1;32m---> 81\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43membedding\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39msqueeze()\n\u001b[0;32m     82\u001b[0m target \u001b[38;5;241m=\u001b[39m train_y[:, i]\u001b[38;5;241m.\u001b[39msqueeze()\n\u001b[0;32m     84\u001b[0m loss \u001b[38;5;241m=\u001b[39m criterion(outputs, target)\n",
      "File \u001b[1;32mc:\\Users\\tomdu\\OneDrive\\Documents\\ENSC_VU\\4A-Vu\\Thesis\\Probing GNN\\ProbingVenv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1530\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m   1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1532\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32mc:\\Users\\tomdu\\OneDrive\\Documents\\ENSC_VU\\4A-Vu\\Thesis\\Probing GNN\\ProbingVenv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m   1539\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1540\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1541\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m   1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m   1544\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "Cell \u001b[1;32mIn[21], line 14\u001b[0m, in \u001b[0;36mLinearModel.forward\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m     13\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[1;32m---> 14\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinear\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mc:\\Users\\tomdu\\OneDrive\\Documents\\ENSC_VU\\4A-Vu\\Thesis\\Probing GNN\\ProbingVenv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1532\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1530\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m   1531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 1532\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
      "File \u001b[1;32mc:\\Users\\tomdu\\OneDrive\\Documents\\ENSC_VU\\4A-Vu\\Thesis\\Probing GNN\\ProbingVenv\\lib\\site-packages\\torch\\nn\\modules\\module.py:1541\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1536\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1537\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1538\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m   1539\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1540\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1541\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m forward_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m   1543\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m   1544\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[1;32mc:\\Users\\tomdu\\OneDrive\\Documents\\ENSC_VU\\4A-Vu\\Thesis\\Probing GNN\\ProbingVenv\\lib\\site-packages\\torch\\nn\\modules\\linear.py:116\u001b[0m, in \u001b[0;36mLinear.forward\u001b[1;34m(self, input)\u001b[0m\n\u001b[0;32m    115\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[1;32m--> 116\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinear\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[1;31mRuntimeError\u001b[0m: mat1 and mat2 shapes cannot be multiplied (1600x2 and 30x1)"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "from sklearn.metrics import mean_squared_error, r2_score\n",
    "import numpy as np\n",
    "\n",
    "# Define the linear model\n",
    "class LinearModel(nn.Module):\n",
    "    def __init__(self, input_size, output_size):\n",
    "        super(LinearModel, self).__init__()\n",
    "        self.linear = nn.Linear(input_size, output_size)\n",
    "\n",
    "    def forward(self, x):\n",
    "        return self.linear(x)\n",
    "\n",
    "# Assume we have already evaluated to get features\n",
    "# train_features, test_features = gnn.evaluate_with_features()\n",
    "\n",
    "# Extract embeddings (in order x1, x2, x_global, x_lin1, out)\n",
    "train_x1 = np.array([feat[0] for feat in train_features])\n",
    "test_x1 = np.array([feat[0] for feat in test_features])\n",
    "\n",
    "# Extract 5th, 6th and 7th embeddings\n",
    "train_x2 = np.array([feat[1] for feat in train_features])\n",
    "test_x2 = np.array([feat[1] for feat in test_features])\n",
    "\n",
    "train_x_global = np.array([feat[2] for feat in train_features])\n",
    "test_x_global = np.array([feat[2] for feat in test_features])\n",
    "\n",
    "train_x_lin1 = np.array([feat[3] for feat in train_features])\n",
    "test_x_lin1 = np.array([feat[3] for feat in test_features])\n",
    "\n",
    "train_out = np.array([feat[4] for feat in train_features])\n",
    "test_out = np.array([feat[4] for feat in test_features])\n",
    "\n",
    "# Compute graph properties\n",
    "train_properties = compute_graph_properties(gnn.dataset[gnn.train_idx])\n",
    "test_properties = compute_graph_properties(gnn.dataset[gnn.test_idx])\n",
    "\n",
    "# Convert to PyTorch tensors\n",
    "train_x = torch.tensor(train_x1, dtype=torch.float32)\n",
    "train_y = torch.tensor(train_properties, dtype=torch.float32)\n",
    "\n",
    "test_x = torch.tensor(test_x1, dtype=torch.float32)\n",
    "test_y = torch.tensor(test_properties, dtype=torch.float32)\n",
    "\n",
    "train_x2 = torch.tensor(train_x2, dtype=torch.float32)\n",
    "test_x2 = torch.tensor(test_x2, dtype=torch.float32)\n",
    "\n",
    "train_x_global = torch.tensor(train_x_global, dtype=torch.float32)\n",
    "test_x_global = torch.tensor(test_x_global, dtype=torch.float32)\n",
    "\n",
    "train_x_lin1 = torch.tensor(train_x_lin1, dtype=torch.float32)\n",
    "test_x_lin1 = torch.tensor(test_x_lin1, dtype=torch.float32)\n",
    "\n",
    "train_out = torch.tensor(train_out, dtype=torch.float32)\n",
    "test_out = torch.tensor(test_out, dtype=torch.float32)\n",
    "\n",
    "# Train and evaluate a model for each graph property and each embeddings\n",
    "input_size = train_x.shape[1]\n",
    "output_size = 1  # Predicting one property at a time\n",
    "\n",
    "\n",
    "property_names = ['num_nodes', 'num_edges', 'density', 'avg_path_len', 'num_cliques']\n",
    "\n",
    "embeddings = [train_x, train_x2, train_x_global, train_x_lin1, train_out]\n",
    "embeddings_names = ['train_x', 'train_x2', 'train_x_global', 'train_x_lin1', 'train_out']\n",
    "\n",
    "for j, embedding in enumerate(embeddings):\n",
    "    # Train and evaluate a model for each graph property\n",
    "    for i, property_name in enumerate(property_names):\n",
    "        model = LinearModel(input_size, output_size)\n",
    "        criterion = nn.MSELoss()\n",
    "        optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
    "        num_epochs = 10000\n",
    "\n",
    "        for epoch in range(num_epochs):\n",
    "            model.train()\n",
    "            optimizer.zero_grad()\n",
    "\n",
    "            outputs = model(embedding).squeeze()\n",
    "            target = train_y[:, i].squeeze()\n",
    "\n",
    "            loss = criterion(outputs, target)\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "\n",
    "            if (epoch+1) % 1000 == 0:\n",
    "                print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')\n",
    "\n",
    "        # Evaluate the model\n",
    "        model.eval()\n",
    "        with torch.no_grad():\n",
    "            train_pred = model(embedding).squeeze().cpu().numpy()\n",
    "            test_pred = model(test_x).squeeze().cpu().numpy()\n",
    "\n",
    "            train_target = train_y[:, i].cpu().numpy()\n",
    "            test_target = test_y[:, i].cpu().numpy()\n",
    "\n",
    "            train_mse = mean_squared_error(train_target, train_pred)\n",
    "            test_mse = mean_squared_error(test_target, test_pred)\n",
    "\n",
    "            train_r2 = r2_score(train_target, train_pred)\n",
    "            test_r2 = r2_score(test_target, test_pred)\n",
    "\n",
    "            print(f'Embedding: {embedding}')\n",
    "            #print the name of the embedding, literally the name of the variable\n",
    "            print(f'Embedding name: {embeddings_names[j]}')\n",
    "            print(f'Property: {property_name}')\n",
    "            print(f'  Train MSE: {train_mse:.4f}, Test MSE: {test_mse:.4f}')\n",
    "            print(f'  Train R²: {train_r2:.4f}, Test R²: {test_r2:.4f}')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch [1000/10000], Property: num_nodes, Loss: 65.6111\n",
      "Epoch [2000/10000], Property: num_nodes, Loss: 38.4882\n",
      "Epoch [3000/10000], Property: num_nodes, Loss: 33.1509\n",
      "Epoch [4000/10000], Property: num_nodes, Loss: 30.5764\n",
      "Epoch [5000/10000], Property: num_nodes, Loss: 27.8995\n",
      "Epoch [6000/10000], Property: num_nodes, Loss: 25.2782\n",
      "Epoch [7000/10000], Property: num_nodes, Loss: 22.8601\n",
      "Epoch [8000/10000], Property: num_nodes, Loss: 20.6307\n",
      "Epoch [9000/10000], Property: num_nodes, Loss: 18.5701\n",
      "Epoch [10000/10000], Property: num_nodes, Loss: 16.6302\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_nodes\n",
      "  Train MSE: 16.6283, Test MSE: 15.6328\n",
      "  Train R²: -1.0134, Test R²: -0.9425\n",
      "Epoch [1000/10000], Property: num_edges, Loss: 92.6828\n",
      "Epoch [2000/10000], Property: num_edges, Loss: 52.3110\n",
      "Epoch [3000/10000], Property: num_edges, Loss: 43.6442\n",
      "Epoch [4000/10000], Property: num_edges, Loss: 40.3993\n",
      "Epoch [5000/10000], Property: num_edges, Loss: 37.2498\n",
      "Epoch [6000/10000], Property: num_edges, Loss: 34.0415\n",
      "Epoch [7000/10000], Property: num_edges, Loss: 31.0307\n",
      "Epoch [8000/10000], Property: num_edges, Loss: 28.2365\n",
      "Epoch [9000/10000], Property: num_edges, Loss: 25.6221\n",
      "Epoch [10000/10000], Property: num_edges, Loss: 23.1642\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_edges\n",
      "  Train MSE: 23.1618, Test MSE: 21.7384\n",
      "  Train R²: -1.1586, Test R²: -0.9905\n",
      "Epoch [1000/10000], Property: density, Loss: 0.0014\n",
      "Epoch [2000/10000], Property: density, Loss: 0.0012\n",
      "Epoch [3000/10000], Property: density, Loss: 0.0010\n",
      "Epoch [4000/10000], Property: density, Loss: 0.0008\n",
      "Epoch [5000/10000], Property: density, Loss: 0.0005\n",
      "Epoch [6000/10000], Property: density, Loss: 0.0004\n",
      "Epoch [7000/10000], Property: density, Loss: 0.0003\n",
      "Epoch [8000/10000], Property: density, Loss: 0.0003\n",
      "Epoch [9000/10000], Property: density, Loss: 0.0003\n",
      "Epoch [10000/10000], Property: density, Loss: 0.0003\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: density\n",
      "  Train MSE: 0.0003, Test MSE: 0.0003\n",
      "  Train R²: -0.1388, Test R²: -0.1283\n",
      "Epoch [1000/10000], Property: avg_path_len, Loss: 0.9098\n",
      "Epoch [2000/10000], Property: avg_path_len, Loss: 0.7622\n",
      "Epoch [3000/10000], Property: avg_path_len, Loss: 0.6399\n",
      "Epoch [4000/10000], Property: avg_path_len, Loss: 0.5310\n",
      "Epoch [5000/10000], Property: avg_path_len, Loss: 0.4300\n",
      "Epoch [6000/10000], Property: avg_path_len, Loss: 0.3435\n",
      "Epoch [7000/10000], Property: avg_path_len, Loss: 0.2827\n",
      "Epoch [8000/10000], Property: avg_path_len, Loss: 0.2523\n",
      "Epoch [9000/10000], Property: avg_path_len, Loss: 0.2434\n",
      "Epoch [10000/10000], Property: avg_path_len, Loss: 0.2423\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: avg_path_len\n",
      "  Train MSE: 0.2423, Test MSE: 0.2431\n",
      "  Train R²: 0.0079, Test R²: 0.0112\n",
      "Epoch [1000/10000], Property: num_cliques, Loss: 93.5011\n",
      "Epoch [2000/10000], Property: num_cliques, Loss: 51.0128\n",
      "Epoch [3000/10000], Property: num_cliques, Loss: 41.7113\n",
      "Epoch [4000/10000], Property: num_cliques, Loss: 38.6372\n",
      "Epoch [5000/10000], Property: num_cliques, Loss: 35.8683\n",
      "Epoch [6000/10000], Property: num_cliques, Loss: 32.9744\n",
      "Epoch [7000/10000], Property: num_cliques, Loss: 30.2087\n",
      "Epoch [8000/10000], Property: num_cliques, Loss: 27.6277\n",
      "Epoch [9000/10000], Property: num_cliques, Loss: 25.2043\n",
      "Epoch [10000/10000], Property: num_cliques, Loss: 22.9190\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_cliques\n",
      "  Train MSE: 22.9168, Test MSE: 20.9870\n",
      "  Train R²: -1.0110, Test R²: -0.8582\n",
      "Epoch [1000/10000], Property: num_nodes, Loss: 168.8011\n",
      "Epoch [2000/10000], Property: num_nodes, Loss: 81.5402\n",
      "Epoch [3000/10000], Property: num_nodes, Loss: 43.0307\n",
      "Epoch [4000/10000], Property: num_nodes, Loss: 26.6524\n",
      "Epoch [5000/10000], Property: num_nodes, Loss: 20.1541\n",
      "Epoch [6000/10000], Property: num_nodes, Loss: 17.3777\n",
      "Epoch [7000/10000], Property: num_nodes, Loss: 15.9358\n",
      "Epoch [8000/10000], Property: num_nodes, Loss: 14.9146\n",
      "Epoch [9000/10000], Property: num_nodes, Loss: 14.0278\n",
      "Epoch [10000/10000], Property: num_nodes, Loss: 13.2480\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_nodes\n",
      "  Train MSE: 13.2472, Test MSE: 12.5325\n",
      "  Train R²: -0.6041, Test R²: -0.5573\n",
      "Epoch [1000/10000], Property: num_edges, Loss: 254.0636\n",
      "Epoch [2000/10000], Property: num_edges, Loss: 128.0669\n",
      "Epoch [3000/10000], Property: num_edges, Loss: 69.5641\n",
      "Epoch [4000/10000], Property: num_edges, Loss: 41.4636\n",
      "Epoch [5000/10000], Property: num_edges, Loss: 29.4299\n",
      "Epoch [6000/10000], Property: num_edges, Loss: 24.3083\n",
      "Epoch [7000/10000], Property: num_edges, Loss: 21.8378\n",
      "Epoch [8000/10000], Property: num_edges, Loss: 20.3481\n",
      "Epoch [9000/10000], Property: num_edges, Loss: 19.1453\n",
      "Epoch [10000/10000], Property: num_edges, Loss: 18.0773\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_edges\n",
      "  Train MSE: 18.0763, Test MSE: 17.0764\n",
      "  Train R²: -0.6847, Test R²: -0.5636\n",
      "Epoch [1000/10000], Property: density, Loss: 0.0006\n",
      "Epoch [2000/10000], Property: density, Loss: 0.0003\n",
      "Epoch [3000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [4000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [5000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [6000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [7000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [8000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [9000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [10000/10000], Property: density, Loss: 0.0002\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: density\n",
      "  Train MSE: 0.0002, Test MSE: 0.0002\n",
      "  Train R²: 0.0168, Test R²: -0.0129\n",
      "Epoch [1000/10000], Property: avg_path_len, Loss: 0.6394\n",
      "Epoch [2000/10000], Property: avg_path_len, Loss: 0.4518\n",
      "Epoch [3000/10000], Property: avg_path_len, Loss: 0.4037\n",
      "Epoch [4000/10000], Property: avg_path_len, Loss: 0.3693\n",
      "Epoch [5000/10000], Property: avg_path_len, Loss: 0.3427\n",
      "Epoch [6000/10000], Property: avg_path_len, Loss: 0.3188\n",
      "Epoch [7000/10000], Property: avg_path_len, Loss: 0.2951\n",
      "Epoch [8000/10000], Property: avg_path_len, Loss: 0.2737\n",
      "Epoch [9000/10000], Property: avg_path_len, Loss: 0.2575\n",
      "Epoch [10000/10000], Property: avg_path_len, Loss: 0.2471\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: avg_path_len\n",
      "  Train MSE: 0.2471, Test MSE: 0.2523\n",
      "  Train R²: -0.0118, Test R²: -0.0262\n",
      "Epoch [1000/10000], Property: num_cliques, Loss: 194.6599\n",
      "Epoch [2000/10000], Property: num_cliques, Loss: 96.3395\n",
      "Epoch [3000/10000], Property: num_cliques, Loss: 51.6945\n",
      "Epoch [4000/10000], Property: num_cliques, Loss: 32.7362\n",
      "Epoch [5000/10000], Property: num_cliques, Loss: 25.2225\n",
      "Epoch [6000/10000], Property: num_cliques, Loss: 22.0044\n",
      "Epoch [7000/10000], Property: num_cliques, Loss: 20.3482\n",
      "Epoch [8000/10000], Property: num_cliques, Loss: 19.2089\n",
      "Epoch [9000/10000], Property: num_cliques, Loss: 18.2273\n",
      "Epoch [10000/10000], Property: num_cliques, Loss: 17.3519\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_cliques\n",
      "  Train MSE: 17.3511, Test MSE: 16.3550\n",
      "  Train R²: -0.5226, Test R²: -0.4481\n",
      "Epoch [1000/10000], Property: num_nodes, Loss: 29.5725\n",
      "Epoch [2000/10000], Property: num_nodes, Loss: 10.7426\n",
      "Epoch [3000/10000], Property: num_nodes, Loss: 6.7654\n",
      "Epoch [4000/10000], Property: num_nodes, Loss: 6.2927\n",
      "Epoch [5000/10000], Property: num_nodes, Loss: 5.9904\n",
      "Epoch [6000/10000], Property: num_nodes, Loss: 5.6832\n",
      "Epoch [7000/10000], Property: num_nodes, Loss: 5.4241\n",
      "Epoch [8000/10000], Property: num_nodes, Loss: 5.2326\n",
      "Epoch [9000/10000], Property: num_nodes, Loss: 5.0874\n",
      "Epoch [10000/10000], Property: num_nodes, Loss: 4.9713\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_nodes\n",
      "  Train MSE: 4.9712, Test MSE: 5.0696\n",
      "  Train R²: 0.3981, Test R²: 0.3701\n",
      "Epoch [1000/10000], Property: num_edges, Loss: 44.4204\n",
      "Epoch [2000/10000], Property: num_edges, Loss: 17.5317\n",
      "Epoch [3000/10000], Property: num_edges, Loss: 10.0943\n",
      "Epoch [4000/10000], Property: num_edges, Loss: 8.8438\n",
      "Epoch [5000/10000], Property: num_edges, Loss: 8.2885\n",
      "Epoch [6000/10000], Property: num_edges, Loss: 7.8252\n",
      "Epoch [7000/10000], Property: num_edges, Loss: 7.4198\n",
      "Epoch [8000/10000], Property: num_edges, Loss: 7.0883\n",
      "Epoch [9000/10000], Property: num_edges, Loss: 6.8381\n",
      "Epoch [10000/10000], Property: num_edges, Loss: 6.6632\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_edges\n",
      "  Train MSE: 6.6630, Test MSE: 6.8888\n",
      "  Train R²: 0.3790, Test R²: 0.3692\n",
      "Epoch [1000/10000], Property: density, Loss: 0.0004\n",
      "Epoch [2000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [3000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [4000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [5000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [6000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [7000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [8000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [9000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [10000/10000], Property: density, Loss: 0.0001\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: density\n",
      "  Train MSE: 0.0001, Test MSE: 0.0001\n",
      "  Train R²: 0.5866, Test R²: 0.5678\n",
      "Epoch [1000/10000], Property: avg_path_len, Loss: 0.4409\n",
      "Epoch [2000/10000], Property: avg_path_len, Loss: 0.3665\n",
      "Epoch [3000/10000], Property: avg_path_len, Loss: 0.3081\n",
      "Epoch [4000/10000], Property: avg_path_len, Loss: 0.2673\n",
      "Epoch [5000/10000], Property: avg_path_len, Loss: 0.2382\n",
      "Epoch [6000/10000], Property: avg_path_len, Loss: 0.2170\n",
      "Epoch [7000/10000], Property: avg_path_len, Loss: 0.2002\n",
      "Epoch [8000/10000], Property: avg_path_len, Loss: 0.1852\n",
      "Epoch [9000/10000], Property: avg_path_len, Loss: 0.1723\n",
      "Epoch [10000/10000], Property: avg_path_len, Loss: 0.1620\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: avg_path_len\n",
      "  Train MSE: 0.1620, Test MSE: 0.1622\n",
      "  Train R²: 0.3365, Test R²: 0.3401\n",
      "Epoch [1000/10000], Property: num_cliques, Loss: 37.3631\n",
      "Epoch [2000/10000], Property: num_cliques, Loss: 13.4036\n",
      "Epoch [3000/10000], Property: num_cliques, Loss: 7.4175\n",
      "Epoch [4000/10000], Property: num_cliques, Loss: 6.4281\n",
      "Epoch [5000/10000], Property: num_cliques, Loss: 6.0296\n",
      "Epoch [6000/10000], Property: num_cliques, Loss: 5.8053\n",
      "Epoch [7000/10000], Property: num_cliques, Loss: 5.6480\n",
      "Epoch [8000/10000], Property: num_cliques, Loss: 5.5252\n",
      "Epoch [9000/10000], Property: num_cliques, Loss: 5.4240\n",
      "Epoch [10000/10000], Property: num_cliques, Loss: 5.3352\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_cliques\n",
      "  Train MSE: 5.3351, Test MSE: 5.2662\n",
      "  Train R²: 0.5318, Test R²: 0.5337\n",
      "Epoch [1000/10000], Property: num_nodes, Loss: 67.2220\n",
      "Epoch [2000/10000], Property: num_nodes, Loss: 27.8214\n",
      "Epoch [3000/10000], Property: num_nodes, Loss: 14.2640\n",
      "Epoch [4000/10000], Property: num_nodes, Loss: 9.4263\n",
      "Epoch [5000/10000], Property: num_nodes, Loss: 7.3669\n",
      "Epoch [6000/10000], Property: num_nodes, Loss: 6.4943\n",
      "Epoch [7000/10000], Property: num_nodes, Loss: 6.0834\n",
      "Epoch [8000/10000], Property: num_nodes, Loss: 5.7593\n",
      "Epoch [9000/10000], Property: num_nodes, Loss: 5.4902\n",
      "Epoch [10000/10000], Property: num_nodes, Loss: 5.2956\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_nodes\n",
      "  Train MSE: 5.2954, Test MSE: 5.5111\n",
      "  Train R²: 0.3588, Test R²: 0.3152\n",
      "Epoch [1000/10000], Property: num_edges, Loss: 109.6324\n",
      "Epoch [2000/10000], Property: num_edges, Loss: 47.4416\n",
      "Epoch [3000/10000], Property: num_edges, Loss: 23.5882\n",
      "Epoch [4000/10000], Property: num_edges, Loss: 14.6224\n",
      "Epoch [5000/10000], Property: num_edges, Loss: 10.8945\n",
      "Epoch [6000/10000], Property: num_edges, Loss: 9.1815\n",
      "Epoch [7000/10000], Property: num_edges, Loss: 8.4330\n",
      "Epoch [8000/10000], Property: num_edges, Loss: 7.9151\n",
      "Epoch [9000/10000], Property: num_edges, Loss: 7.4820\n",
      "Epoch [10000/10000], Property: num_edges, Loss: 7.1611\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_edges\n",
      "  Train MSE: 7.1608, Test MSE: 7.6258\n",
      "  Train R²: 0.3326, Test R²: 0.3017\n",
      "Epoch [1000/10000], Property: density, Loss: 0.0010\n",
      "Epoch [2000/10000], Property: density, Loss: 0.0004\n",
      "Epoch [3000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [4000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [5000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [6000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [7000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [8000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [9000/10000], Property: density, Loss: 0.0001\n",
      "Epoch [10000/10000], Property: density, Loss: 0.0001\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: density\n",
      "  Train MSE: 0.0001, Test MSE: 0.0001\n",
      "  Train R²: 0.5956, Test R²: 0.5714\n",
      "Epoch [1000/10000], Property: avg_path_len, Loss: 0.3436\n",
      "Epoch [2000/10000], Property: avg_path_len, Loss: 0.2791\n",
      "Epoch [3000/10000], Property: avg_path_len, Loss: 0.2546\n",
      "Epoch [4000/10000], Property: avg_path_len, Loss: 0.2369\n",
      "Epoch [5000/10000], Property: avg_path_len, Loss: 0.2234\n",
      "Epoch [6000/10000], Property: avg_path_len, Loss: 0.2104\n",
      "Epoch [7000/10000], Property: avg_path_len, Loss: 0.1960\n",
      "Epoch [8000/10000], Property: avg_path_len, Loss: 0.1812\n",
      "Epoch [9000/10000], Property: avg_path_len, Loss: 0.1676\n",
      "Epoch [10000/10000], Property: avg_path_len, Loss: 0.1565\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: avg_path_len\n",
      "  Train MSE: 0.1565, Test MSE: 0.1537\n",
      "  Train R²: 0.3593, Test R²: 0.3749\n",
      "Epoch [1000/10000], Property: num_cliques, Loss: 99.7905\n",
      "Epoch [2000/10000], Property: num_cliques, Loss: 39.7331\n",
      "Epoch [3000/10000], Property: num_cliques, Loss: 17.9121\n",
      "Epoch [4000/10000], Property: num_cliques, Loss: 10.8497\n",
      "Epoch [5000/10000], Property: num_cliques, Loss: 8.4811\n",
      "Epoch [6000/10000], Property: num_cliques, Loss: 7.4351\n",
      "Epoch [7000/10000], Property: num_cliques, Loss: 6.8875\n",
      "Epoch [8000/10000], Property: num_cliques, Loss: 6.4473\n",
      "Epoch [9000/10000], Property: num_cliques, Loss: 6.0703\n",
      "Epoch [10000/10000], Property: num_cliques, Loss: 5.7812\n",
      "Embedding: torch.Size([1600, 30])\n",
      "Property: num_cliques\n",
      "  Train MSE: 5.7810, Test MSE: 6.0434\n",
      "  Train R²: 0.4927, Test R²: 0.4649\n",
      "Epoch [1000/10000], Property: num_nodes, Loss: 466.4979\n",
      "Epoch [2000/10000], Property: num_nodes, Loss: 389.3019\n",
      "Epoch [3000/10000], Property: num_nodes, Loss: 317.8561\n",
      "Epoch [4000/10000], Property: num_nodes, Loss: 254.2464\n",
      "Epoch [5000/10000], Property: num_nodes, Loss: 199.1347\n",
      "Epoch [6000/10000], Property: num_nodes, Loss: 152.5173\n",
      "Epoch [7000/10000], Property: num_nodes, Loss: 114.1728\n",
      "Epoch [8000/10000], Property: num_nodes, Loss: 83.7659\n",
      "Epoch [9000/10000], Property: num_nodes, Loss: 60.8418\n",
      "Epoch [10000/10000], Property: num_nodes, Loss: 44.7711\n",
      "Embedding: torch.Size([1600, 2])\n",
      "Property: num_nodes\n",
      "  Train MSE: 44.7582, Test MSE: 44.3011\n",
      "  Train R²: -4.4196, Test R²: -4.5048\n",
      "Epoch [1000/10000], Property: num_edges, Loss: 718.1711\n",
      "Epoch [2000/10000], Property: num_edges, Loss: 620.2916\n",
      "Epoch [3000/10000], Property: num_edges, Loss: 528.0339\n",
      "Epoch [4000/10000], Property: num_edges, Loss: 442.5022\n",
      "Epoch [5000/10000], Property: num_edges, Loss: 365.2748\n",
      "Epoch [6000/10000], Property: num_edges, Loss: 296.7726\n",
      "Epoch [7000/10000], Property: num_edges, Loss: 237.0131\n",
      "Epoch [8000/10000], Property: num_edges, Loss: 185.8629\n",
      "Epoch [9000/10000], Property: num_edges, Loss: 143.1084\n",
      "Epoch [10000/10000], Property: num_edges, Loss: 108.4616\n",
      "Embedding: torch.Size([1600, 2])\n",
      "Property: num_edges\n",
      "  Train MSE: 108.4309, Test MSE: 104.8644\n",
      "  Train R²: -9.1055, Test R²: -8.6021\n",
      "Epoch [1000/10000], Property: density, Loss: 0.0159\n",
      "Epoch [2000/10000], Property: density, Loss: 0.0104\n",
      "Epoch [3000/10000], Property: density, Loss: 0.0081\n",
      "Epoch [4000/10000], Property: density, Loss: 0.0054\n",
      "Epoch [5000/10000], Property: density, Loss: 0.0027\n",
      "Epoch [6000/10000], Property: density, Loss: 0.0010\n",
      "Epoch [7000/10000], Property: density, Loss: 0.0003\n",
      "Epoch [8000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [9000/10000], Property: density, Loss: 0.0002\n",
      "Epoch [10000/10000], Property: density, Loss: 0.0002\n",
      "Embedding: torch.Size([1600, 2])\n",
      "Property: density\n",
      "  Train MSE: 0.0002, Test MSE: 0.0002\n",
      "  Train R²: 0.3215, Test R²: 0.3270\n",
      "Epoch [1000/10000], Property: avg_path_len, Loss: 4.3950\n",
      "Epoch [2000/10000], Property: avg_path_len, Loss: 1.6130\n",
      "Epoch [3000/10000], Property: avg_path_len, Loss: 1.1128\n",
      "Epoch [4000/10000], Property: avg_path_len, Loss: 0.8927\n",
      "Epoch [5000/10000], Property: avg_path_len, Loss: 0.6521\n",
      "Epoch [6000/10000], Property: avg_path_len, Loss: 0.4281\n",
      "Epoch [7000/10000], Property: avg_path_len, Loss: 0.2679\n",
      "Epoch [8000/10000], Property: avg_path_len, Loss: 0.1868\n",
      "Epoch [9000/10000], Property: avg_path_len, Loss: 0.1623\n",
      "Epoch [10000/10000], Property: avg_path_len, Loss: 0.1591\n",
      "Embedding: torch.Size([1600, 2])\n",
      "Property: avg_path_len\n",
      "  Train MSE: 0.1591, Test MSE: 0.1578\n",
      "  Train R²: 0.3484, Test R²: 0.3580\n",
      "Epoch [1000/10000], Property: num_cliques, Loss: 568.1819\n",
      "Epoch [2000/10000], Property: num_cliques, Loss: 476.0873\n",
      "Epoch [3000/10000], Property: num_cliques, Loss: 393.3170\n",
      "Epoch [4000/10000], Property: num_cliques, Loss: 320.1321\n",
      "Epoch [5000/10000], Property: num_cliques, Loss: 256.2071\n",
      "Epoch [6000/10000], Property: num_cliques, Loss: 201.2088\n",
      "Epoch [7000/10000], Property: num_cliques, Loss: 154.8359\n",
      "Epoch [8000/10000], Property: num_cliques, Loss: 116.7807\n",
      "Epoch [9000/10000], Property: num_cliques, Loss: 86.6812\n",
      "Epoch [10000/10000], Property: num_cliques, Loss: 64.0690\n",
      "Embedding: torch.Size([1600, 2])\n",
      "Property: num_cliques\n",
      "  Train MSE: 64.0500, Test MSE: 62.6073\n",
      "  Train R²: -4.6204, Test R²: -4.5434\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.optim as optim\n",
    "from sklearn.metrics import mean_squared_error, r2_score\n",
    "import numpy as np\n",
    "output_size = 1  # Predicting one property at a time\n",
    "# Define the linear model\n",
    "class LinearModel(nn.Module):\n",
    "    def __init__(self, input_size, output_size):\n",
    "        super(LinearModel, self).__init__()\n",
    "        self.linear = nn.Linear(input_size, output_size)\n",
    "\n",
    "    def forward(self, x):\n",
    "        return self.linear(x)\n",
    "\n",
    "# Assume we have already evaluated to get features\n",
    "# train_features, test_features = gnn.evaluate_with_features()\n",
    "\n",
    "# Extract embeddings (in order x1, x2, x_global, x_lin1, out)\n",
    "train_x1 = np.array([feat[0] for feat in train_features])\n",
    "test_x1 = np.array([feat[0] for feat in test_features])\n",
    "\n",
    "# Extract 5th, 6th and 7th embeddings\n",
    "train_x2 = np.array([feat[1] for feat in train_features])\n",
    "test_x2 = np.array([feat[1] for feat in test_features])\n",
    "\n",
    "train_x_global = np.array([feat[2] for feat in train_features])\n",
    "test_x_global = np.array([feat[2] for feat in test_features])\n",
    "\n",
    "train_x_lin1 = np.array([feat[3] for feat in train_features])\n",
    "test_x_lin1 = np.array([feat[3] for feat in test_features])\n",
    "\n",
    "train_out = np.array([feat[4] for feat in train_features])\n",
    "test_out = np.array([feat[4] for feat in test_features])\n",
    "\n",
    "# Compute graph properties\n",
    "train_properties = compute_graph_properties(gnn.dataset[gnn.train_idx])\n",
    "test_properties = compute_graph_properties(gnn.dataset[gnn.test_idx])\n",
    "\n",
    "# Convert to PyTorch tensors\n",
    "train_x = torch.tensor(train_x1, dtype=torch.float32)\n",
    "train_y = torch.tensor(train_properties, dtype=torch.float32)\n",
    "\n",
    "test_x = torch.tensor(test_x1, dtype=torch.float32)\n",
    "test_y = torch.tensor(test_properties, dtype=torch.float32)\n",
    "\n",
    "train_x2 = torch.tensor(train_x2, dtype=torch.float32)\n",
    "test_x2 = torch.tensor(test_x2, dtype=torch.float32)\n",
    "\n",
    "train_x_global = torch.tensor(train_x_global, dtype=torch.float32)\n",
    "test_x_global = torch.tensor(test_x_global, dtype=torch.float32)\n",
    "\n",
    "train_x_lin1 = torch.tensor(train_x_lin1, dtype=torch.float32)\n",
    "test_x_lin1 = torch.tensor(test_x_lin1, dtype=torch.float32)\n",
    "\n",
    "train_out = torch.tensor(train_out, dtype=torch.float32)\n",
    "test_out = torch.tensor(test_out, dtype=torch.float32)\n",
    "\n",
    "# Train and evaluate a model for each graph property and each embedding\n",
    "property_names = ['num_nodes', 'num_edges', 'density', 'avg_path_len', 'num_cliques']\n",
    "embeddings = [(train_x, test_x), (train_x2, test_x2), (train_x_global, test_x_global), (train_x_lin1, test_x_lin1), (train_out, test_out)]\n",
    "\n",
    "for train_embedding, test_embedding in embeddings:\n",
    "    input_size = train_embedding.shape[1]\n",
    "\n",
    "    for i, property_name in enumerate(property_names):\n",
    "        model = LinearModel(input_size, output_size)\n",
    "        criterion = nn.MSELoss()\n",
    "        optimizer = optim.Adam(model.parameters(), lr=0.001)\n",
    "        num_epochs = 10000  # Adjust this as needed\n",
    "\n",
    "        for epoch in range(num_epochs):\n",
    "            model.train()\n",
    "            optimizer.zero_grad()\n",
    "\n",
    "            outputs = model(train_embedding).squeeze()\n",
    "            target = train_y[:, i].squeeze()\n",
    "\n",
    "            loss = criterion(outputs, target)\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "\n",
    "            if (epoch+1) % 1000 == 0:  # Adjust this for more frequent/lower print frequency\n",
    "                print(f'Epoch [{epoch+1}/{num_epochs}], Property: {property_name}, Loss: {loss.item():.4f}')\n",
    "\n",
    "        # Evaluate the model\n",
    "        model.eval()\n",
    "        with torch.no_grad():\n",
    "            train_pred = model(train_embedding).squeeze().cpu().numpy()\n",
    "            test_pred = model(test_embedding).squeeze().cpu().numpy()\n",
    "\n",
    "            train_target = train_y[:, i].cpu().numpy()\n",
    "            test_target = test_y[:, i].cpu().numpy()\n",
    "\n",
    "            train_mse = mean_squared_error(train_target, train_pred)\n",
    "            test_mse = mean_squared_error(test_target, test_pred)\n",
    "\n",
    "            train_r2 = r2_score(train_target, train_pred)\n",
    "            test_r2 = r2_score(test_target, test_pred)\n",
    "\n",
    "            print(f'Embedding: {train_embedding.shape}')\n",
    "            print(f'Property: {property_name}')\n",
    "            print(f'  Train MSE: {train_mse:.4f}, Test MSE: {test_mse:.4f}')\n",
    "            print(f'  Train R²: {train_r2:.4f}, Test R²: {test_r2:.4f}')\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
