{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "aed47885",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import os\n",
    "import csv\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.decomposition import PCA\n",
    "from pathlib import Path\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e47f3fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# Define base path as current folder\n",
    "base_path = Path(\".\")\n",
    "\n",
    "graph_pickle_path = base_path / \"merged_graphs.pickle\"\n",
    "save_path = base_path / \"merged_graphs_with_embedding.pickle\"\n",
    "\n",
    "embedding_files = {\n",
    "    \"ground_truth\": \"ground_truth_dictionary.pkl\",\n",
    "    \"generated\": \"generated_dictionary.pkl\",\n",
    "    \"random\": \"random_dictionary.pkl\"\n",
    "}\n",
    "\n",
    "graph_to_emb_key = {\n",
    "    \"groundtruth_graph\": \"ground_truth\",\n",
    "    \"gpt_generated_graph\": \"generated\",\n",
    "    \"random_graph\": \"random\"\n",
    "}\n",
    "\n",
    "embeddings_dicts = {}\n",
    "for emb_key, fname in embedding_files.items():\n",
    "    file_path = os.path.join(base_path, fname)\n",
    "    print(f\"Loading {file_path} ...\")\n",
    "    with open(file_path, 'rb') as f:\n",
    "        embeddings_dicts[emb_key] = pickle.load(f)\n",
    "print(\"Loaded embedding keys:\", embeddings_dicts.keys())\n",
    "\n",
    "with open(graph_pickle_path, 'rb') as f:\n",
    "    merged_graphs = pickle.load(f)\n",
    "print(\"Loaded merged_graphs. Number of samples:\", len(merged_graphs))\n",
    "\n",
    "for sample_id, graphs in merged_graphs.items():\n",
    "    for graph_key in graph_to_emb_key.keys():\n",
    "        emb_key = graph_to_emb_key[graph_key]\n",
    "        graph = graphs[graph_key]\n",
    "        emb_data = embeddings_dicts[emb_key][sample_id]\n",
    "        ref_emb = emb_data['reference_embeddings']\n",
    "        focal_emb = emb_data['focal_embedding'].reshape(-1)\n",
    "        for node_id in graph.nodes():\n",
    "            if node_id == sample_id:\n",
    "                graph.nodes[node_id]['feature'] = focal_emb\n",
    "            else:\n",
    "                graph.nodes[node_id]['feature'] = ref_emb.get(node_id, None)\n",
    "\n",
    "\n",
    "with open(save_path, 'wb') as f:\n",
    "    pickle.dump(merged_graphs, f)\n",
    "\n",
    "print(\"Embeddings (feature & focal_feature) added to all graph nodes and file saved.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65727a32",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# Path to your saved file\n",
    "sample_file = base_path / \"merged_graphs_with_embedding.pickle\"\n",
    "\n",
    "with open(sample_file, 'rb') as f:\n",
    "    merged_graphs = pickle.load(f)\n",
    "\n",
    "sample_id = 2108687244  \n",
    "\n",
    "if sample_id not in merged_graphs:\n",
    "    print(f\"Sample ID {sample_id} not found in the data.\")\n",
    "else:\n",
    "    graphs = merged_graphs[sample_id]\n",
    "    print(f\"Sample ID: {sample_id}\\n\")\n",
    "    for graph_type, graph in graphs.items():\n",
    "        print(f\"--- {graph_type} ---\")\n",
    "        print(f\"Number of nodes: {graph.number_of_nodes()}\")\n",
    "        print(f\"Number of edges: {graph.number_of_edges()}\")\n",
    "        count = 0\n",
    "        for node_id, node_attrs in graph.nodes(data=True):\n",
    "            print(f\"Node ID: {node_id}\")\n",
    "            if 'feature' in node_attrs and node_attrs['feature'] is not None:\n",
    "                print(\"  Feature shape:\", node_attrs['feature'].shape)\n",
    "                print(\"  Feature sample (first 5 dims):\", node_attrs['feature'][:5])\n",
    "            else:\n",
    "                print(\"  Feature: None\")\n",
    "            count += 1\n",
    "            if count >= 12:\n",
    "                break\n",
    "        print()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3cf4629e",
   "metadata": {},
   "outputs": [],
   "source": [
    "sample_file = base_path / \"merged_graphs_with_embedding.pickle\"\n",
    "\n",
    "with open(file_path, 'rb') as f:\n",
    "    merged_graphs = pickle.load(f)\n",
    "\n",
    "categories = ['groundtruth_graph', 'gpt_generated_graph', 'random_graph']\n",
    "\n",
    "# Create empty dictionaries for each category\n",
    "split_graphs = {cat: {} for cat in categories}\n",
    "\n",
    "# Split the merged_graphs into three separate datasets\n",
    "for sample_id, graphs in merged_graphs.items():\n",
    "    for cat in categories:\n",
    "        split_graphs[cat][sample_id] = graphs[cat]\n",
    "\n",
    "# Now save each split dataset as a separate pickle file\n",
    "for cat in categories:\n",
    "    out_path = file_path.replace(\"merged_graphs_with_embedding.pickle\", f\"{cat}_graphs_with embedding.pickle\")\n",
    "    with open(out_path, 'wb') as f:\n",
    "        pickle.dump(split_graphs[cat], f)\n",
    "    print(f\"Saved {cat} dataset with {len(split_graphs[cat])} samples to {out_path}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9637adc7",
   "metadata": {},
   "outputs": [],
   "source": [
    "categories = [\"groundtruth_graph\", \"gpt_generated_graph\", \"random_graph\"]\n",
    "\n",
    "for cat in categories:\n",
    "    file_path = f\"{base_path}{cat}_graphs_with embedding.pickle\"\n",
    "    with open(file_path, 'rb') as f:\n",
    "        graphs = pickle.load(f)\n",
    "    print(f\"\\n--- {cat.upper()} ---\")\n",
    "    print(f\"Total samples: {len(graphs)}\")\n",
    "    sample_keys = list(graphs.keys())[:2] \n",
    "    \n",
    "    for sample_id in sample_keys:\n",
    "        print(f\"\\nSample ID: {sample_id}\")\n",
    "        print(f\"Type of sample: {type(graphs[sample_id])}\")\n",
    "        try:\n",
    "            preview = str(graphs[sample_id])\n",
    "            if len(preview) > 700: \n",
    "                preview = preview[:700] + \"\\n[TRUNCATED]\"\n",
    "            print(f\"Preview:\\n{preview}\")\n",
    "        except Exception as e:\n",
    "            print(f\"Can't print this sample directly: {e}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9584afab",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "file_path = base_path / \"groundtruth_graph_graphs_with_embedding.pickle\"\n",
    "with open(file_path, \"rb\") as f:\n",
    "    groundtruth_graph = pickle.load(f)\n",
    "sample_id = list(groundtruth_graph.keys())[0]  \n",
    "G = groundtruth_graph[sample_id]\n",
    "\n",
    "print(f\"Sample ID: {sample_id}\")\n",
    "print(f\"Number of nodes: {G.number_of_nodes()}\")\n",
    "print(f\"Number of edges: {G.number_of_edges()}\")\n",
    "\n",
    "for node in list(G.nodes())[:20]: \n",
    "    feature = G.nodes[node].get(\"feature\", None)\n",
    "    print(f\"Node ID: {node}\")\n",
    "    if feature is None:\n",
    "        print(\"  Feature: None\")\n",
    "    elif isinstance(feature, np.ndarray):\n",
    "        print(f\"  Feature shape: {feature.shape}\")\n",
    "        print(f\"  Feature sample (first 5 dims): {feature[:5]}\")\n",
    "    else:\n",
    "        print(f\"  Feature type: {type(feature)}\")\n",
    "        print(f\"  Feature preview: {feature}\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "67cd6fd2",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
