{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Aptos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reference shape: (35122, 60), Test shape: (3662, 60)\n",
      "       image_id   rho_avg  rho_final  inside_interval  point\n",
      "0  000c1434d8d7  0.004206  -0.008677             True      1\n",
      "1  001639a390f0  0.002765  -0.010118             True      1\n",
      "2  0024cdab0c1e -0.000379  -0.013262             True      1\n",
      "3  002c21358ce6  0.003922  -0.008961             True      1\n",
      "4  005b95c28852  0.023008   0.010125             True      1\n",
      "\n",
      "Interval: [-0.04568945, 0.04568945] | Inside: 3038 / 3662 (82.96%)\n",
      "Results saved to /drive2/Kuntal/Pysindy-experiment/pysindy/SDG/eyepack/sgd_eyepac-on-aptos_results.csv\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "def load_theta_and_ids(theta_path, ids_path):\n",
    "    \"\"\"Loads theta vectors and corresponding IDs, flattening each theta to 1D.\"\"\"\n",
    "    thetas = np.load(theta_path)\n",
    "    ids = np.load(ids_path)\n",
    "    # Flatten each theta (N, 3, 20) -> (N, 60)\n",
    "    thetas = thetas.reshape(thetas.shape[0], -1)\n",
    "    # Convert to flat string if IDs are bytes\n",
    "    if ids.dtype.type is np.bytes_:\n",
    "        ids = np.array([x.decode() for x in ids])\n",
    "    return thetas, ids\n",
    "\n",
    "\n",
    "def cosine_similarity_matrix(A, B):\n",
    "    \"\"\"\n",
    "    Compute cosine similarities between all vectors in A (N, D) and B (M, D).\n",
    "    Returns (M, N) matrix where entry (i, j) = cos_sim(B[i], A[j])\n",
    "    \"\"\"\n",
    "    # Compute dot products for all pairs\n",
    "    dot_products = np.dot(B, A.T)  # (M, N)\n",
    "    # Compute norms\n",
    "    B_norms = np.linalg.norm(B, axis=1, keepdims=True)    # (M, 1)\n",
    "    A_norms = np.linalg.norm(A, axis=1, keepdims=True)    # (N, 1)\n",
    "    # Outer product of norms: (M, 1) * (1, N) -> (M, N)\n",
    "    norm_matrix = B_norms @ A_norms.T + 1e-8\n",
    "    # Elementwise division\n",
    "    return dot_products / norm_matrix\n",
    "\n",
    "\n",
    "def sgd_test_with_ids(\n",
    "    ref_theta_path, ref_ids_path,\n",
    "    test_theta_path, test_ids_path,\n",
    "    reference_avg,\n",
    "    conf_interval,\n",
    "    out_csv=None,\n",
    "    verbose=True\n",
    "):\n",
    "    # 1. Load data\n",
    "    A, A_ids = load_theta_and_ids(ref_theta_path, ref_ids_path)\n",
    "    B, B_ids = load_theta_and_ids(test_theta_path, test_ids_path)\n",
    "    if verbose:\n",
    "        print(f\"Reference shape: {A.shape}, Test shape: {B.shape}\")\n",
    "\n",
    "    # 2. Cosine similarity matrix\n",
    "    sim_matrix = cosine_similarity_matrix(A, B)\n",
    "    # 3. Row-wise average for each test image\n",
    "    rho_avg = np.mean(sim_matrix, axis=1)\n",
    "    \n",
    "    # 4. Subtract reference average\n",
    "    rho_final = rho_avg - reference_avg\n",
    "    # 5. Interval check\n",
    "    min_bound, max_bound = conf_interval\n",
    "    inside_mask = (rho_final >= min_bound) & (rho_final <= max_bound)\n",
    "    percent_inside = 100.0 * inside_mask.sum() / len(rho_final)\n",
    "    # 6. Assign points (1 = inside, 0 = outside)\n",
    "    points = inside_mask.astype(int)\n",
    "\n",
    "    # 7. Tabulate results with IDs and points\n",
    "    results_df = pd.DataFrame({\n",
    "        \"image_id\": B_ids,\n",
    "        \"rho_avg\": rho_avg,\n",
    "        \"rho_final\": rho_final,\n",
    "        \"inside_interval\": inside_mask,\n",
    "        \"point\": points\n",
    "    })\n",
    "    if verbose:\n",
    "        print(results_df.head())\n",
    "        print(f\"\\nInterval: [{min_bound:.8f}, {max_bound:.8f}] | Inside: {inside_mask.sum()} / {len(rho_final)} ({percent_inside:.2f}%)\")\n",
    "    if out_csv is not None:\n",
    "        results_df.to_csv(out_csv, index=False)\n",
    "        print(f\"Results saved to {out_csv}\")\n",
    "    return results_df\n",
    "\n",
    "# -------------------- USAGE EXAMPLE --------------------\n",
    "if __name__ == \"__main__\":\n",
    "    # Messidor 1 example\n",
    "    sgd_test_with_ids(\n",
    "        ref_theta_path=\"/drive2/Kuntal/Pysindy-experiment/eyepacs_theta_data/eyepacs_all_thetas.npy\",\n",
    "        ref_ids_path=\"/drive2/Kuntal/Pysindy-experiment/eyepacs_theta_data/eyepacs_theta_ids.npy\",\n",
    "        test_theta_path=\"/drive2/Kuntal/Pysindy-experiment/aptos_theta_data/aptos_all_thetas.npy\",     # <-- test domain\n",
    "        test_ids_path=\"/drive2/Kuntal/Pysindy-experiment/aptos_theta_data/aptos_theta_ids.npy\",        # <-- test ids\n",
    "        reference_avg=0.01288287374567551,\n",
    "        conf_interval=(-0.04568944923551152, 0.04568944923551152),\n",
    "        out_csv=\"/drive2/Kuntal/Pysindy-experiment/pysindy/SDG/eyepack/sgd_eyepac-on-aptos_results.csv\",\n",
    "    )\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# M1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reference shape: (35122, 60), Test shape: (1200, 60)\n",
      "                     image_id   rho_avg  rho_final  inside_interval  point\n",
      "0  20051019_38557_0100_PP.tif  0.011488  -0.001395             True      1\n",
      "1  20051020_43808_0100_PP.tif -0.006307  -0.019190             True      1\n",
      "2  20051020_43832_0100_PP.tif  0.003436  -0.009447             True      1\n",
      "3  20051020_43882_0100_PP.tif -0.005434  -0.018317             True      1\n",
      "4  20051020_43906_0100_PP.tif -0.009320  -0.022203             True      1\n",
      "\n",
      "Interval: [-0.04568945, 0.04568945] | Inside: 1124 / 1200 (93.67%)\n",
      "Results saved to /drive2/Kuntal/Pysindy-experiment/pysindy/SDG/eyepack/sgd_eyepac-on-messidor-1_results.csv\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "def load_theta_and_ids(theta_path, ids_path):\n",
    "    \"\"\"Loads theta vectors and corresponding IDs, flattening each theta to 1D.\"\"\"\n",
    "    thetas = np.load(theta_path)\n",
    "    ids = np.load(ids_path)\n",
    "    # Flatten each theta (N, 3, 20) -> (N, 60)\n",
    "    thetas = thetas.reshape(thetas.shape[0], -1)\n",
    "    # Convert to flat string if IDs are bytes\n",
    "    if ids.dtype.type is np.bytes_:\n",
    "        ids = np.array([x.decode() for x in ids])\n",
    "    return thetas, ids\n",
    "\n",
    "\n",
    "def cosine_similarity_matrix(A, B):\n",
    "    \"\"\"\n",
    "    Compute cosine similarities between all vectors in A (N, D) and B (M, D).\n",
    "    Returns (M, N) matrix where entry (i, j) = cos_sim(B[i], A[j])\n",
    "    \"\"\"\n",
    "    # Compute dot products for all pairs\n",
    "    dot_products = np.dot(B, A.T)  # (M, N)\n",
    "    # Compute norms\n",
    "    B_norms = np.linalg.norm(B, axis=1, keepdims=True)    # (M, 1)\n",
    "    A_norms = np.linalg.norm(A, axis=1, keepdims=True)    # (N, 1)\n",
    "    # Outer product of norms: (M, 1) * (1, N) -> (M, N)\n",
    "    norm_matrix = B_norms @ A_norms.T + 1e-8\n",
    "    # Elementwise division\n",
    "    return dot_products / norm_matrix\n",
    "\n",
    "\n",
    "def sgd_test_with_ids(\n",
    "    ref_theta_path, ref_ids_path,\n",
    "    test_theta_path, test_ids_path,\n",
    "    reference_avg,\n",
    "    conf_interval,\n",
    "    out_csv=None,\n",
    "    verbose=True\n",
    "):\n",
    "    # 1. Load data\n",
    "    A, A_ids = load_theta_and_ids(ref_theta_path, ref_ids_path)\n",
    "    B, B_ids = load_theta_and_ids(test_theta_path, test_ids_path)\n",
    "    if verbose:\n",
    "        print(f\"Reference shape: {A.shape}, Test shape: {B.shape}\")\n",
    "\n",
    "    # 2. Cosine similarity matrix\n",
    "    sim_matrix = cosine_similarity_matrix(A, B)\n",
    "    # 3. Row-wise average for each test image\n",
    "    rho_avg = np.mean(sim_matrix, axis=1)\n",
    "    \n",
    "    # 4. Subtract reference average\n",
    "    rho_final = rho_avg - reference_avg\n",
    "    # 5. Interval check\n",
    "    min_bound, max_bound = conf_interval\n",
    "    inside_mask = (rho_final >= min_bound) & (rho_final <= max_bound)\n",
    "    percent_inside = 100.0 * inside_mask.sum() / len(rho_final)\n",
    "    # 6. Assign points (1 = inside, 0 = outside)\n",
    "    points = inside_mask.astype(int)\n",
    "\n",
    "    # 7. Tabulate results with IDs and points\n",
    "    results_df = pd.DataFrame({\n",
    "        \"image_id\": B_ids,\n",
    "        \"rho_avg\": rho_avg,\n",
    "        \"rho_final\": rho_final,\n",
    "        \"inside_interval\": inside_mask,\n",
    "        \"point\": points\n",
    "    })\n",
    "    if verbose:\n",
    "        print(results_df.head())\n",
    "        print(f\"\\nInterval: [{min_bound:.8f}, {max_bound:.8f}] | Inside: {inside_mask.sum()} / {len(rho_final)} ({percent_inside:.2f}%)\")\n",
    "    if out_csv is not None:\n",
    "        results_df.to_csv(out_csv, index=False)\n",
    "        print(f\"Results saved to {out_csv}\")\n",
    "    return results_df\n",
    "\n",
    "# -------------------- USAGE EXAMPLE --------------------\n",
    "if __name__ == \"__main__\":\n",
    "    # Messidor 1 example\n",
    "    sgd_test_with_ids(\n",
    "        ref_theta_path=\"/drive2/Kuntal/Pysindy-experiment/eyepacs_theta_data/eyepacs_all_thetas.npy\",\n",
    "        ref_ids_path=\"/drive2/Kuntal/Pysindy-experiment/eyepacs_theta_data/eyepacs_theta_ids.npy\",\n",
    "        test_theta_path=\"/drive2/Kuntal/Pysindy-experiment/M1-output/messidor_all_thetas.npy\",     # <-- test domain\n",
    "        test_ids_path=\"/drive2/Kuntal/Pysindy-experiment/M1-output/messidor_theta_ids.npy\",        # <-- test ids\n",
    "        reference_avg=0.01288287374567551,\n",
    "        conf_interval=(-0.04568944923551152, 0.04568944923551152),\n",
    "        out_csv=\"/drive2/Kuntal/Pysindy-experiment/pysindy/SDG/eyepack/sgd_eyepac-on-messidor-1_results.csv\",\n",
    "    )\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# M2 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reference shape: (35122, 60), Test shape: (1057, 60)\n",
      "                     image_id   rho_avg  rho_final  inside_interval  point\n",
      "0  20051020_43808_0100_PP.png -0.006307  -0.019190             True      1\n",
      "1  20051020_43832_0100_PP.png -0.003854  -0.016737             True      1\n",
      "2  20051020_43882_0100_PP.png -0.005434  -0.018317             True      1\n",
      "3  20051020_43906_0100_PP.png -0.004391  -0.017274             True      1\n",
      "4  20051020_44261_0100_PP.png  0.001934  -0.010949             True      1\n",
      "\n",
      "Interval: [-0.04568945, 0.04568945] | Inside: 1009 / 1057 (95.46%)\n",
      "Results saved to /drive2/Kuntal/Pysindy-experiment/pysindy/SDG/eyepack/sgd_eyepack-on-messidor-2_results.csv\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "def load_theta_and_ids(theta_path, ids_path):\n",
    "    \"\"\"Loads theta vectors and corresponding IDs, flattening each theta to 1D.\"\"\"\n",
    "    thetas = np.load(theta_path)\n",
    "    ids = np.load(ids_path)\n",
    "    # Flatten each theta (N, 3, 20) -> (N, 60)\n",
    "    thetas = thetas.reshape(thetas.shape[0], -1)\n",
    "    # Convert to flat string if IDs are bytes\n",
    "    if ids.dtype.type is np.bytes_:\n",
    "        ids = np.array([x.decode() for x in ids])\n",
    "    return thetas, ids\n",
    "\n",
    "\n",
    "def cosine_similarity_matrix(A, B):\n",
    "    \"\"\"\n",
    "    Compute cosine similarities between all vectors in A (N, D) and B (M, D).\n",
    "    Returns (M, N) matrix where entry (i, j) = cos_sim(B[i], A[j])\n",
    "    \"\"\"\n",
    "    # Compute dot products for all pairs\n",
    "    dot_products = np.dot(B, A.T)  # (M, N)\n",
    "    # Compute norms\n",
    "    B_norms = np.linalg.norm(B, axis=1, keepdims=True)    # (M, 1)\n",
    "    A_norms = np.linalg.norm(A, axis=1, keepdims=True)    # (N, 1)\n",
    "    # Outer product of norms: (M, 1) * (1, N) -> (M, N)\n",
    "    norm_matrix = B_norms @ A_norms.T + 1e-8\n",
    "    # Elementwise division\n",
    "    return dot_products / norm_matrix\n",
    "\n",
    "\n",
    "def sgd_test_with_ids(\n",
    "    ref_theta_path, ref_ids_path,\n",
    "    test_theta_path, test_ids_path,\n",
    "    reference_avg,\n",
    "    conf_interval,\n",
    "    out_csv=None,\n",
    "    verbose=True\n",
    "):\n",
    "    # 1. Load data\n",
    "    A, A_ids = load_theta_and_ids(ref_theta_path, ref_ids_path)\n",
    "    B, B_ids = load_theta_and_ids(test_theta_path, test_ids_path)\n",
    "    if verbose:\n",
    "        print(f\"Reference shape: {A.shape}, Test shape: {B.shape}\")\n",
    "\n",
    "    # 2. Cosine similarity matrix\n",
    "    sim_matrix = cosine_similarity_matrix(A, B)\n",
    "    # 3. Row-wise average for each test image\n",
    "    rho_avg = np.mean(sim_matrix, axis=1)\n",
    "    \n",
    "    # 4. Subtract reference average\n",
    "    rho_final = rho_avg - reference_avg\n",
    "    # 5. Interval check\n",
    "    min_bound, max_bound = conf_interval\n",
    "    inside_mask = (rho_final >= min_bound) & (rho_final <= max_bound)\n",
    "    percent_inside = 100.0 * inside_mask.sum() / len(rho_final)\n",
    "    # 6. Assign points (1 = inside, 0 = outside)\n",
    "    points = inside_mask.astype(int)\n",
    "\n",
    "    # 7. Tabulate results with IDs and points\n",
    "    results_df = pd.DataFrame({\n",
    "        \"image_id\": B_ids,\n",
    "        \"rho_avg\": rho_avg,\n",
    "        \"rho_final\": rho_final,\n",
    "        \"inside_interval\": inside_mask,\n",
    "        \"point\": points\n",
    "    })\n",
    "    if verbose:\n",
    "        print(results_df.head())\n",
    "        print(f\"\\nInterval: [{min_bound:.8f}, {max_bound:.8f}] | Inside: {inside_mask.sum()} / {len(rho_final)} ({percent_inside:.2f}%)\")\n",
    "    if out_csv is not None:\n",
    "        results_df.to_csv(out_csv, index=False)\n",
    "        print(f\"Results saved to {out_csv}\")\n",
    "    return results_df\n",
    "\n",
    "# -------------------- USAGE EXAMPLE --------------------\n",
    "if __name__ == \"__main__\":\n",
    "    # Messidor 1 example\n",
    "    sgd_test_with_ids(\n",
    "        ref_theta_path=\"/drive2/Kuntal/Pysindy-experiment/eyepacs_theta_data/eyepacs_all_thetas.npy\",\n",
    "        ref_ids_path=\"/drive2/Kuntal/Pysindy-experiment/eyepacs_theta_data/eyepacs_theta_ids.npy\",\n",
    "        test_theta_path=\"/drive2/Kuntal/Pysindy-experiment/M2-output/messidor2_all_thetas.npy\",     # <-- test domain\n",
    "        test_ids_path=\"/drive2/Kuntal/Pysindy-experiment/M2-output/messidor2_theta_ids.npy\",        # <-- test ids\n",
    "        reference_avg=0.01288287374567551,\n",
    "        conf_interval=(-0.04568944923551152, 0.04568944923551152),\n",
    "        out_csv=\"/drive2/Kuntal/Pysindy-experiment/pysindy/SDG/eyepack/sgd_eyepack-on-messidor-2_results.csv\",\n",
    "    )\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
