{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e538b4d4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ELSA statistics (first 5):\n",
      "[ 0.92422243  0.07457548 -0.33358348 -0.28251264 -0.15223174]\n",
      "\n",
      "Standardized cosine similarities (z) (first 5):\n",
      "[-1.70043636 -0.21470018 -0.63104324  0.40233445  0.23555044]\n",
      "\n",
      "Pseudo p-values (first 5):\n",
      "[0.464 0.582 0.342 0.23  0.244]\n",
      "\n",
      "Quadrant classifications (first 5):\n",
      "[2 2 3 4 4]\n",
      "\n",
      "DataFrame with results (first 5 rows):\n",
      "   lat  lon    elsa_e  elsa_p_sim  elsa_q\n",
      "0    0    0  0.924222       0.464       2\n",
      "1    0    1  0.074575       0.582       2\n",
      "2    0    2 -0.333583       0.342       3\n",
      "3    0    3 -0.282513       0.230       4\n",
      "4    0    4 -0.152232       0.244       4\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from libpysal.weights import lat2W\n",
    "import importlib\n",
    "from elsa import ELSA\n",
    "\n",
    "# 1. Generate some dummy data\n",
    "# Let's imagine we have 100 locations in a 10x10 grid\n",
    "# Each location has a 64-dimensional embedding vector\n",
    "n_observations = 100\n",
    "embedding_dim = 64\n",
    "np.random.seed(42)\n",
    "embeddings = np.random.rand(n_observations, embedding_dim)\n",
    "# Normalize embeddings\n",
    "embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)\n",
    "\n",
    "# Create coordinates for the grid to build a weights matrix\n",
    "coords = np.array([(i // 10, i % 10) for i in range(n_observations)])\n",
    "df = pd.DataFrame(coords, columns=['lat', 'lon'])\n",
    "\n",
    "# 2. Create a spatial weights matrix (W)\n",
    "# Here we'll use a simple Queen contiguity matrix from the grid coordinates\n",
    "# lat2W expects (nrows, ncols) for the grid dimensions\n",
    "w = lat2W(10, 10)  # 10x10 grid = 100 observations\n",
    "w.transform = 'r' # Row-standardize the weights\n",
    "\n",
    "# 3. Compute ELSA\n",
    "elsa_results = ELSA(embeddings, w, permutations=999)\n",
    "\n",
    "# 4. Inspect the results\n",
    "print(\"ELSA statistics (first 5):\")\n",
    "print(elsa_results.e[:5])\n",
    "\n",
    "print(\"\\nStandardized cosine similarities (z) (first 5):\")\n",
    "print(elsa_results.z[:5])\n",
    "\n",
    "print(\"\\nPseudo p-values (first 5):\")\n",
    "print(elsa_results.p_sim[:5])\n",
    "\n",
    "print(\"\\nQuadrant classifications (first 5):\")\n",
    "print(elsa_results.q[:5])\n",
    "# Quadrant meanings:\n",
    "# 1: High-High (hotspot)\n",
    "# 2: Low-Low (coldspot)\n",
    "# 3: Low-High (outlier)\n",
    "# 4: High-Low (outlier)\n",
    "\n",
    "# You can add the results to your original dataframe for analysis or plotting\n",
    "df['elsa_e'] = elsa_results.e\n",
    "df['elsa_p_sim'] = elsa_results.p_sim\n",
    "df['elsa_q'] = elsa_results.q\n",
    "\n",
    "print(\"\\nDataFrame with results (first 5 rows):\")\n",
    "print(df.head())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
