{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "9b8fea47",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Edge list saved to .\\data\\cora_edges.csv\n"
     ]
    }
   ],
   "source": [
    "# -----------------------------\n",
    "# Jupyter Notebook: Run M-NMF on Windows\n",
    "# -----------------------------\n",
    "\n",
    "import os\n",
    "import pandas as pd\n",
    "from torch_geometric.datasets import Planetoid\n",
    "import sys\n",
    "\n",
    "# -----------------------------\n",
    "# Step 1: Set paths\n",
    "# -----------------------------\n",
    "BASE_DIR = r\".\"\n",
    "\n",
    "\n",
    "DATA_DIR = os.path.join(BASE_DIR, \"data\")\n",
    "SRC_DIR = os.path.join(BASE_DIR, \"src\")\n",
    "OUTPUT_EMB = os.path.join(BASE_DIR, r\"output\\embeddings\\cora_embeddings.csv\")\n",
    "OUTPUT_CLUSTER = os.path.join(BASE_DIR, r\"output\\cluster_means\\cora_clusters.csv\")\n",
    "OUTPUT_ASSIGN = os.path.join(BASE_DIR, r\"output\\assignments\\cora_assignment.json\")\n",
    "OUTPUT_LOG = os.path.join(BASE_DIR, r\"output\\logs\\cora_log.json\")\n",
    "\n",
    "# Make sure output directories exist\n",
    "os.makedirs(os.path.join(BASE_DIR, r\"output\\embeddings\"), exist_ok=True)\n",
    "os.makedirs(os.path.join(BASE_DIR, r\"output\\cluster_means\"), exist_ok=True)\n",
    "os.makedirs(os.path.join(BASE_DIR, r\"output\\logs\"), exist_ok=True)\n",
    "os.makedirs(os.path.join(BASE_DIR, r\"output\\assignments\"), exist_ok=True)\n",
    "os.makedirs(DATA_DIR, exist_ok=True)\n",
    "\n",
    "# -----------------------------\n",
    "# Step 2: Load Cora dataset\n",
    "# -----------------------------\n",
    "dataset = Planetoid(root=DATA_DIR, name='Cora')\n",
    "data = dataset[0]\n",
    "\n",
    "# Save edge list as CSV\n",
    "edges = data.edge_index.t().numpy()\n",
    "edge_df = pd.DataFrame(edges, columns=['source', 'target'])\n",
    "EDGE_CSV = os.path.join(DATA_DIR, \"cora_edges.csv\")\n",
    "edge_df.to_csv(EDGE_CSV, index=False)\n",
    "print(f\"Edge list saved to {EDGE_CSV}\")\n",
    "\n",
    "# -----------------------------\n",
    "# Step 3: Add src to sys.path and import M-NMF\n",
    "# -----------------------------\n",
    "sys.path.append(SRC_DIR)\n",
    "\n",
    "from main_get_emb import create_and_run_model\n",
    "from param_parser import parameter_parser\n",
    "\n",
    "# -----------------------------\n",
    "# Step 4: Setup args for M-NMF\n",
    "# -----------------------------\n",
    "import sys\n",
    "sys.argv = ['']  # prevent argparse from parsing Jupyter args\n",
    "\n",
    "args = parameter_parser()\n",
    "args.input = os.path.join(DATA_DIR, \"cora_edges.csv\")\n",
    "args.embedding_output = OUTPUT_EMB\n",
    "args.cluster_mean_output = OUTPUT_CLUSTER\n",
    "args.assignment_output = OUTPUT_ASSIGN\n",
    "args.log_output = OUTPUT_LOG\n",
    "args.dump_matrices = True\n",
    "args.dimensions = 150 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c4b35b44-dd71-4b85-9db9-8ab518da7361",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---------------------+-------------------------------------------+\n",
      "|        Input        |           .\\data\\cora_edges.csv           |\n",
      "+=====================+===========================================+\n",
      "| Embedding output    | .\\output\\embeddings\\cora_embeddings.csv   |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Cluster mean output | .\\output\\cluster_means\\cora_clusters.csv  |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Log output          | .\\output\\logs\\cora_log.json               |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Assignment output   | .\\output\\assignments\\cora_assignment.json |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Dump matrices       | True                                      |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Dimensions          | 150                                       |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Clusters            | 20                                        |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Lambd               | 0.200                                     |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Alpha               | 0.050                                     |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Beta                | 0.050                                     |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Iteration number    | 200                                       |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Early stopping      | 3                                         |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Lower control       | 0.000                                     |\n",
      "+---------------------+-------------------------------------------+\n",
      "| Eta                 | 5                                         |\n",
      "+---------------------+-------------------------------------------+\n",
      "Model initialization started.\n",
      "\n",
      "WARNING:tensorflow:From .\\src\\modularity_nmf_get_emb.py:34: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.\n",
      "\n",
      "WARNING:tensorflow:From .\\src\\modularity_nmf_get_emb.py:38: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.\n",
      "\n",
      "WARNING:tensorflow:From .\\src\\modularity_nmf_get_emb.py:48: The name tf.global_variables_initializer is deprecated. Please use tf.compat.v1.global_variables_initializer instead.\n",
      "\n",
      "WARNING:tensorflow:From .\\src\\modularity_nmf_get_emb.py:135: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.\n",
      "\n",
      "Optimization started.\n",
      "\n",
      "Second order proximity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 2708/2708 [00:09<00:00, 297.40it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Modularity calculation.\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████| 2708/2708 [00:05<00:00, 522.38it/s]\n",
      "  2%|█▏                                                                                | 3/200 [00:00<00:38,  5.13it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape of embeddings: (2708, 150)\n"
     ]
    }
   ],
   "source": [
    "# Run M-NMF and get embeddings as a NumPy array\n",
    "embeddings_np = create_and_run_model(args)\n",
    "\n",
    "print(\"Shape of embeddings:\", embeddings_np.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "16268fd4-ed56-40f1-99a7-ae51e95b4976",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'np' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-3-869ce46cd979>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0moutput_path\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"output\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"embeddings\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"cora_embeddings.npy\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmakedirs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdirname\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutput_path\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexist_ok\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0moutput_path\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0membeddings_np\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"Embeddings saved as NumPy array at {output_path}\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'np' is not defined"
     ]
    }
   ],
   "source": [
    "import numpy\n",
    "# Save embeddings as .npy file\n",
    "output_path = os.path.join(\"output\", \"embeddings\", \"cora_embeddings.npy\")\n",
    "os.makedirs(os.path.dirname(output_path), exist_ok=True)\n",
    "np.save(output_path, embeddings_np)\n",
    "\n",
    "print(f\"Embeddings saved as NumPy array at {output_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "954490e9-03d2-4784-9e0b-5baecb760450",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: numpy in c:\\users\\user\\anaconda3\\envs\\mnmf\\lib\\site-packages (1.19.5)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install numpy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "27a81104-92d7-406d-8b1c-e17f5065cedd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
