{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9b8fea47",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Processing dataset: AmazonPhoto\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Downloading https://github.com/shchur/gnn-benchmark/raw/master/data/npz/amazon_electronics_photo.npz\n",
      "Processing...\n",
      "C:\\Users\\user\\anaconda3\\envs\\mnmf\\lib\\site-packages\\torch_geometric\\io\\npz.py:21: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at  ..\\torch\\csrc\\utils\\tensor_new.cpp:201.)\n",
      "  edge_index = torch.tensor([adj.row, adj.col], dtype=torch.long)\n",
      "Done!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Edge list saved to .\\data\\AmazonPhoto\\AmazonPhoto_edges.csv\n",
      "\n",
      "Processing split: 30_70\n",
      "\n",
      "Running seed: 42\n",
      "+---------------------+--------------------------------------------------------+\n",
      "|        Input        |        .\\data\\AmazonPhoto\\AmazonPhoto_edges.csv        |\n",
      "+=====================+========================================================+\n",
      "| Embedding output    | .\\output\\embeddings\\AmazonPhoto\\30_70\\AmazonPhoto_30_7 |\n",
      "|                     | 0_seed42_mnmf.pkl                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Cluster mean output | .\\output\\cluster_means\\AmazonPhoto\\AmazonPhoto_seed42_ |\n",
      "|                     | clusters.csv                                           |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Log output          | .\\output\\logs\\AmazonPhoto\\AmazonPhoto_seed42_log.json  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Assignment output   | .\\output\\assignments\\AmazonPhoto\\AmazonPhoto_seed42_as |\n",
      "|                     | signment.json                                          |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Dump matrices       | True                                                   |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Dimensions          | 150                                                    |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Clusters            | 20                                                     |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Lambd               | 0.200                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Alpha               | 0.050                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Beta                | 0.050                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Iteration number    | 200                                                    |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Early stopping      | 3                                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Lower control       | 0.000                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Eta                 | 5                                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "Model initialization started.\n",
      "\n",
      "WARNING:tensorflow:From ./src\\modularity_nmf_get_emb.py:39: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n",
      "\n",
      "WARNING:tensorflow:From ./src\\modularity_nmf_get_emb.py:43: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n",
      "\n",
      "WARNING:tensorflow:From ./src\\modularity_nmf_get_emb.py:53: The name tf.global_variables_initializer is deprecated. Please use tf.compat.v1.global_variables_initializer instead.\n",
      "\n",
      "WARNING:tensorflow:From ./src\\modularity_nmf_get_emb.py:137: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n",
      "\n",
      "Optimization started.\n",
      "\n",
      "Second order proximity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████| 7535/7535 [01:32<00:00, 81.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Modularity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 7535/7535 [00:42<00:00, 175.94it/s]\n",
      " 91%|████████████████████████████████████████████████████████████████████████▊       | 182/200 [02:51<00:16,  1.06it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Seed 42 execution time: 315.61 seconds\n",
      "Embeddings saved as .pkl at .\\output\\embeddings\\AmazonPhoto\\30_70\\AmazonPhoto_30_70_seed42_mnmf.pkl\n",
      "Copied embedding to 70_30 split: .\\output\\embeddings\\AmazonPhoto\\70_30\\AmazonPhoto_70_30_seed42_mnmf.pkl\n",
      "\n",
      "Running seed: 46\n",
      "+---------------------+--------------------------------------------------------+\n",
      "|        Input        |        .\\data\\AmazonPhoto\\AmazonPhoto_edges.csv        |\n",
      "+=====================+========================================================+\n",
      "| Embedding output    | .\\output\\embeddings\\AmazonPhoto\\30_70\\AmazonPhoto_30_7 |\n",
      "|                     | 0_seed46_mnmf.pkl                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Cluster mean output | .\\output\\cluster_means\\AmazonPhoto\\AmazonPhoto_seed46_ |\n",
      "|                     | clusters.csv                                           |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Log output          | .\\output\\logs\\AmazonPhoto\\AmazonPhoto_seed46_log.json  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Assignment output   | .\\output\\assignments\\AmazonPhoto\\AmazonPhoto_seed46_as |\n",
      "|                     | signment.json                                          |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Dump matrices       | True                                                   |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Dimensions          | 150                                                    |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Clusters            | 20                                                     |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Lambd               | 0.200                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Alpha               | 0.050                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Beta                | 0.050                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Iteration number    | 200                                                    |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Early stopping      | 3                                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Lower control       | 0.000                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Eta                 | 5                                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "Model initialization started.\n",
      "\n",
      "Optimization started.\n",
      "\n",
      "Second order proximity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████| 7535/7535 [01:27<00:00, 86.06it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Modularity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 7535/7535 [00:42<00:00, 177.04it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [03:10<00:00,  1.05it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Seed 46 execution time: 326.86 seconds\n",
      "Embeddings saved as .pkl at .\\output\\embeddings\\AmazonPhoto\\30_70\\AmazonPhoto_30_70_seed46_mnmf.pkl\n",
      "Copied embedding to 70_30 split: .\\output\\embeddings\\AmazonPhoto\\70_30\\AmazonPhoto_70_30_seed46_mnmf.pkl\n",
      "\n",
      "Running seed: 123\n",
      "+---------------------+--------------------------------------------------------+\n",
      "|        Input        |        .\\data\\AmazonPhoto\\AmazonPhoto_edges.csv        |\n",
      "+=====================+========================================================+\n",
      "| Embedding output    | .\\output\\embeddings\\AmazonPhoto\\30_70\\AmazonPhoto_30_7 |\n",
      "|                     | 0_seed123_mnmf.pkl                                     |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Cluster mean output | .\\output\\cluster_means\\AmazonPhoto\\AmazonPhoto_seed123 |\n",
      "|                     | _clusters.csv                                          |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Log output          | .\\output\\logs\\AmazonPhoto\\AmazonPhoto_seed123_log.json |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Assignment output   | .\\output\\assignments\\AmazonPhoto\\AmazonPhoto_seed123_a |\n",
      "|                     | ssignment.json                                         |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Dump matrices       | True                                                   |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Dimensions          | 150                                                    |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Clusters            | 20                                                     |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Lambd               | 0.200                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Alpha               | 0.050                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Beta                | 0.050                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Iteration number    | 200                                                    |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Early stopping      | 3                                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Lower control       | 0.000                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Eta                 | 5                                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "Model initialization started.\n",
      "\n",
      "Optimization started.\n",
      "\n",
      "Second order proximity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████| 7535/7535 [01:29<00:00, 84.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Modularity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 7535/7535 [00:43<00:00, 173.93it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [03:06<00:00,  1.07it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Seed 123 execution time: 325.98 seconds\n",
      "Embeddings saved as .pkl at .\\output\\embeddings\\AmazonPhoto\\30_70\\AmazonPhoto_30_70_seed123_mnmf.pkl\n",
      "Copied embedding to 70_30 split: .\\output\\embeddings\\AmazonPhoto\\70_30\\AmazonPhoto_70_30_seed123_mnmf.pkl\n",
      "\n",
      "Running seed: 2025\n",
      "+---------------------+--------------------------------------------------------+\n",
      "|        Input        |        .\\data\\AmazonPhoto\\AmazonPhoto_edges.csv        |\n",
      "+=====================+========================================================+\n",
      "| Embedding output    | .\\output\\embeddings\\AmazonPhoto\\30_70\\AmazonPhoto_30_7 |\n",
      "|                     | 0_seed2025_mnmf.pkl                                    |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Cluster mean output | .\\output\\cluster_means\\AmazonPhoto\\AmazonPhoto_seed202 |\n",
      "|                     | 5_clusters.csv                                         |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Log output          | .\\output\\logs\\AmazonPhoto\\AmazonPhoto_seed2025_log.jso |\n",
      "|                     | n                                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Assignment output   | .\\output\\assignments\\AmazonPhoto\\AmazonPhoto_seed2025_ |\n",
      "|                     | assignment.json                                        |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Dump matrices       | True                                                   |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Dimensions          | 150                                                    |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Clusters            | 20                                                     |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Lambd               | 0.200                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Alpha               | 0.050                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Beta                | 0.050                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Iteration number    | 200                                                    |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Early stopping      | 3                                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Lower control       | 0.000                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Eta                 | 5                                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "Model initialization started.\n",
      "\n",
      "Optimization started.\n",
      "\n",
      "Second order proximity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████| 7535/7535 [01:28<00:00, 85.34it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Modularity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 7535/7535 [00:43<00:00, 175.18it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [03:06<00:00,  1.07it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Seed 2025 execution time: 324.43 seconds\n",
      "Embeddings saved as .pkl at .\\output\\embeddings\\AmazonPhoto\\30_70\\AmazonPhoto_30_70_seed2025_mnmf.pkl\n",
      "Copied embedding to 70_30 split: .\\output\\embeddings\\AmazonPhoto\\70_30\\AmazonPhoto_70_30_seed2025_mnmf.pkl\n",
      "\n",
      "Running seed: 999\n",
      "+---------------------+--------------------------------------------------------+\n",
      "|        Input        |        .\\data\\AmazonPhoto\\AmazonPhoto_edges.csv        |\n",
      "+=====================+========================================================+\n",
      "| Embedding output    | .\\output\\embeddings\\AmazonPhoto\\30_70\\AmazonPhoto_30_7 |\n",
      "|                     | 0_seed999_mnmf.pkl                                     |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Cluster mean output | .\\output\\cluster_means\\AmazonPhoto\\AmazonPhoto_seed999 |\n",
      "|                     | _clusters.csv                                          |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Log output          | .\\output\\logs\\AmazonPhoto\\AmazonPhoto_seed999_log.json |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Assignment output   | .\\output\\assignments\\AmazonPhoto\\AmazonPhoto_seed999_a |\n",
      "|                     | ssignment.json                                         |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Dump matrices       | True                                                   |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Dimensions          | 150                                                    |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Clusters            | 20                                                     |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Lambd               | 0.200                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Alpha               | 0.050                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Beta                | 0.050                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Iteration number    | 200                                                    |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Early stopping      | 3                                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Lower control       | 0.000                                                  |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "| Eta                 | 5                                                      |\n",
      "+---------------------+--------------------------------------------------------+\n",
      "Model initialization started.\n",
      "\n",
      "Optimization started.\n",
      "\n",
      "Second order proximity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████████████████████████████████████████████████████████████████████████| 7535/7535 [01:28<00:00, 85.57it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Modularity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 7535/7535 [00:43<00:00, 173.50it/s]\n",
      "100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [03:05<00:00,  1.08it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Seed 999 execution time: 323.69 seconds\n",
      "Embeddings saved as .pkl at .\\output\\embeddings\\AmazonPhoto\\30_70\\AmazonPhoto_30_70_seed999_mnmf.pkl\n",
      "Copied embedding to 70_30 split: .\\output\\embeddings\\AmazonPhoto\\70_30\\AmazonPhoto_70_30_seed999_mnmf.pkl\n",
      "\n",
      "Average execution time for AmazonPhoto (30_70): 323.31 seconds\n",
      "Average execution time saved to .\\output\\execution_times\\AmazonPhoto_avg_execution_time.csv\n",
      "\n",
      "Processing split: 70_30\n",
      "Skipping model run for split 70_30, since it's a copy from 30_70.\n",
      "AmazonPhoto processing completed!\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# -----------------------------\n",
    "# Unified M-NMF Embedding Pipeline with Seed Support, Split Support, and Average Execution Time\n",
    "# -----------------------------\n",
    "\n",
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import sys\n",
    "import time\n",
    "from torch_geometric.datasets import Planetoid, WikiCS, Amazon\n",
    "\n",
    "# Import modified M-NMF function that returns embeddings\n",
    "sys.path.append(\"./src\")\n",
    "from main_get_emb import create_and_run_model\n",
    "from param_parser import parameter_parser\n",
    "\n",
    "# -----------------------------\n",
    "# Step 0: Dataset Configuration\n",
    "# -----------------------------\n",
    "#DATASET_CONFIGS = [\n",
    "    #{\"name\": \"Cora\", \"loader\": Planetoid},\n",
    "    #{\"name\": \"CiteSeer\", \"loader\": Planetoid},\n",
    "    #{\"name\": \"PubMed\", \"loader\": Planetoid},\n",
    "    #{\"name\": \"WikiCS\", \"loader\": WikiCS},\n",
    "    #{\"name\": \"AmazonPhotos\", \"loader\": Amazon},  # PyG expects \"AmazonPhotos\"\n",
    "#]\n",
    "\n",
    "DATASET_CONFIGS = [\n",
    "    #{\"name\": \"Cora\", \"loader\": Planetoid, \"pyg_name\": \"Cora\"},\n",
    "    #{\"name\": \"CiteSeer\", \"loader\": Planetoid, \"pyg_name\": \"CiteSeer\"},\n",
    "    #{\"name\": \"PubMed\", \"loader\": Planetoid, \"pyg_name\": \"PubMed\"},\n",
    "    #{\"name\": \"WikiCS\", \"loader\": WikiCS, \"pyg_name\": None},  # WikiCS doesn’t need a name\n",
    "    {\"name\": \"AmazonPhotos\", \"loader\": Amazon, \"pyg_name\": \"photo\"},  # Use \"photos\"\n",
    "]\n",
    "\n",
    "\n",
    "SEEDS = [42, 46, 123, 2025, 999]\n",
    "SPLITS = ['30_70', '70_30']\n",
    "\n",
    "BASE_DIR = \".\"\n",
    "OUTPUT_DIR = os.path.join(BASE_DIR, \"output\")\n",
    "\n",
    "# -----------------------------\n",
    "# Step 1: Iterate over datasets\n",
    "# -----------------------------\n",
    "for ds in DATASET_CONFIGS:\n",
    "    dataset_name = ds[\"name\"]\n",
    "    loader = ds[\"loader\"]\n",
    "\n",
    "    print(f\"\\nProcessing dataset: {dataset_name}\")\n",
    "\n",
    "    # Paths\n",
    "    data_dir = os.path.join(BASE_DIR, \"data\", dataset_name)\n",
    "    emb_base_dir = os.path.join(OUTPUT_DIR, \"embeddings\", dataset_name)\n",
    "    cluster_dir = os.path.join(OUTPUT_DIR, \"cluster_means\", dataset_name)\n",
    "    assign_dir = os.path.join(OUTPUT_DIR, \"assignments\", dataset_name)\n",
    "    log_dir = os.path.join(OUTPUT_DIR, \"logs\", dataset_name)\n",
    "    time_dir = os.path.join(OUTPUT_DIR, \"execution_times\")\n",
    "\n",
    "    os.makedirs(data_dir, exist_ok=True)\n",
    "    os.makedirs(cluster_dir, exist_ok=True)\n",
    "    os.makedirs(assign_dir, exist_ok=True)\n",
    "    os.makedirs(log_dir, exist_ok=True)\n",
    "    os.makedirs(time_dir, exist_ok=True)\n",
    "\n",
    "    # Load Dataset\n",
    "    #if loader == Planetoid:\n",
    "    #    dataset = loader(root=data_dir, name=dataset_name)\n",
    "    #elif loader == WikiCS:\n",
    "    #    dataset = loader(root=data_dir)\n",
    "    #elif loader == Amazon:\n",
    "    #    dataset = loader(root=data_dir, name=dataset_name)\n",
    "    #else:\n",
    "    #    raise NotImplementedError(f\"Loader for {dataset_name} not implemented!\")\n",
    "    \n",
    "    if loader == Planetoid or loader == Amazon:\n",
    "        dataset = loader(root=data_dir, name=ds[\"pyg_name\"])\n",
    "    elif loader == WikiCS:\n",
    "        dataset = loader(root=data_dir)\n",
    "    else:\n",
    "        raise NotImplementedError(f\"Loader for {dataset_name} not implemented!\")\n",
    "\n",
    "    data = dataset[0]\n",
    "\n",
    "    # Save edges as CSV\n",
    "    if hasattr(data, 'edge_index'):\n",
    "        edges = data.edge_index.t().numpy()\n",
    "    else:\n",
    "        adj = data.adj_t.to_dense()\n",
    "        edges = np.array(np.nonzero(adj)).T\n",
    "\n",
    "    edge_df = pd.DataFrame(edges, columns=['source', 'target'])\n",
    "    edge_csv = os.path.join(data_dir, f\"{dataset_name}_edges.csv\")\n",
    "    edge_df.to_csv(edge_csv, index=False)\n",
    "    print(f\"Edge list saved to {edge_csv}\")\n",
    "\n",
    "    for split in SPLITS:\n",
    "        print(f\"\\nProcessing split: {split}\")\n",
    "\n",
    "        emb_dir = os.path.join(emb_base_dir, split)\n",
    "        os.makedirs(emb_dir, exist_ok=True)\n",
    "\n",
    "        if split == '30_70':\n",
    "            total_execution_time = 0.0\n",
    "\n",
    "            for seed in SEEDS:\n",
    "                print(f\"\\nRunning seed: {seed}\")\n",
    "\n",
    "                sys.argv = ['']  # Clear command-line arguments\n",
    "\n",
    "                args = parameter_parser()\n",
    "                args.input = edge_csv\n",
    "                args.embedding_output = os.path.join(\n",
    "                    emb_dir, f\"{dataset_name}_{split}_seed{seed}_mnmf.pkl\"\n",
    "                )\n",
    "                args.cluster_mean_output = os.path.join(cluster_dir, f\"{dataset_name}_seed{seed}_clusters.csv\")\n",
    "                args.assignment_output = os.path.join(assign_dir, f\"{dataset_name}_seed{seed}_assignment.json\")\n",
    "                args.log_output = os.path.join(log_dir, f\"{dataset_name}_seed{seed}_log.json\")\n",
    "                args.dump_matrices = True\n",
    "                args.dimensions = 150\n",
    "                np.random.seed(seed)\n",
    "\n",
    "                start_time = time.time()\n",
    "                embeddings = create_and_run_model(args)\n",
    "                end_time = time.time()\n",
    "\n",
    "                execution_duration = end_time - start_time\n",
    "                total_execution_time += execution_duration\n",
    "\n",
    "                print(f\"Seed {seed} execution time: {execution_duration:.2f} seconds\")\n",
    "\n",
    "                emb_pkl_path = os.path.join(emb_dir, f\"{dataset_name}_{split}_seed{seed}_mnmf.pkl\")\n",
    "                pd.to_pickle(embeddings, emb_pkl_path)\n",
    "                print(f\"Embeddings saved as .pkl at {emb_pkl_path}\")\n",
    "\n",
    "                # Copy embedding to 70_30 folder\n",
    "                emb_dir_70_30 = os.path.join(emb_base_dir, '70_30')\n",
    "                os.makedirs(emb_dir_70_30, exist_ok=True)\n",
    "                emb_pkl_70_30_path = os.path.join(emb_dir_70_30, f\"{dataset_name}_70_30_seed{seed}_mnmf.pkl\")\n",
    "                pd.to_pickle(embeddings, emb_pkl_70_30_path)\n",
    "                print(f\"Copied embedding to 70_30 split: {emb_pkl_70_30_path}\")\n",
    "\n",
    "            avg_execution_time = total_execution_time / len(SEEDS)\n",
    "            print(f\"\\nAverage execution time for {dataset_name} (30_70): {avg_execution_time:.2f} seconds\")\n",
    "\n",
    "            # Save average execution time once\n",
    "            time_df = pd.DataFrame({\n",
    "                \"dataset\": [dataset_name],\n",
    "                \"split\": ['30_70'],\n",
    "                \"average_execution_time_sec\": [avg_execution_time]\n",
    "            })\n",
    "            time_csv_path = os.path.join(time_dir, f\"{dataset_name}_avg_execution_time.csv\")\n",
    "            time_df.to_csv(time_csv_path, index=False)\n",
    "            print(f\"Average execution time saved to {time_csv_path}\")\n",
    "\n",
    "        else:\n",
    "            # Skip model run for '70_30' split\n",
    "            print(f\"Skipping model run for split {split}, since it's a copy from 30_70.\")\n",
    "\n",
    "\n",
    "    print(f\"{dataset_name} processing completed!\\n\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a23494cd-8ac6-4c6e-938c-7637826995f2",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
