{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Eyepack\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reference shape: (3662, 60), Test shape: (35122, 60)\n",
      "   image_id   rho_avg  rho_final  inside_interval  point\n",
      "0   10_left -0.052322  -0.071301            False      0\n",
      "1  10_right -0.017697  -0.036676             True      1\n",
      "2   13_left -0.032330  -0.051308             True      1\n",
      "3  13_right  0.089886   0.070908            False      0\n",
      "4   15_left  0.025859   0.006881             True      1\n",
      "\n",
      "Interval: [-0.05960987, 0.05960987] | Inside: 25356 / 35122 (72.19%)\n",
      "Results saved to /drive2/Kuntal/Pysindy-experiment/pysindy/SDG/aptos/sgd_aptos-on-eyepacks_results.csv\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "def load_theta_and_ids(theta_path, ids_path):\n",
    "    \"\"\"Loads theta vectors and corresponding IDs, flattening each theta to 1D.\"\"\"\n",
    "    thetas = np.load(theta_path)\n",
    "    ids = np.load(ids_path)\n",
    "    # Flatten each theta (N, 3, 20) -> (N, 60)\n",
    "    thetas = thetas.reshape(thetas.shape[0], -1)\n",
    "    # Convert to flat string if IDs are bytes\n",
    "    if ids.dtype.type is np.bytes_:\n",
    "        ids = np.array([x.decode() for x in ids])\n",
    "    return thetas, ids\n",
    "\n",
    "\n",
    "def cosine_similarity_matrix(A, B):\n",
    "    \"\"\"\n",
    "    Compute cosine similarities between all vectors in A (N, D) and B (M, D).\n",
    "    Returns (M, N) matrix where entry (i, j) = cos_sim(B[i], A[j])\n",
    "    \"\"\"\n",
    "    # Compute dot products for all pairs\n",
    "    dot_products = np.dot(B, A.T)  # (M, N)\n",
    "    # Compute norms\n",
    "    B_norms = np.linalg.norm(B, axis=1, keepdims=True)    # (M, 1)\n",
    "    A_norms = np.linalg.norm(A, axis=1, keepdims=True)    # (N, 1)\n",
    "    # Outer product of norms: (M, 1) * (1, N) -> (M, N)\n",
    "    norm_matrix = B_norms @ A_norms.T + 1e-8\n",
    "    # Elementwise division\n",
    "    return dot_products / norm_matrix\n",
    "\n",
    "\n",
    "def sgd_test_with_ids(\n",
    "    ref_theta_path, ref_ids_path,\n",
    "    test_theta_path, test_ids_path,\n",
    "    reference_avg,\n",
    "    conf_interval,\n",
    "    out_csv=None,\n",
    "    verbose=True\n",
    "):\n",
    "    # 1. Load data\n",
    "    A, A_ids = load_theta_and_ids(ref_theta_path, ref_ids_path)\n",
    "    B, B_ids = load_theta_and_ids(test_theta_path, test_ids_path)\n",
    "    if verbose:\n",
    "        print(f\"Reference shape: {A.shape}, Test shape: {B.shape}\")\n",
    "\n",
    "    # 2. Cosine similarity matrix\n",
    "    sim_matrix = cosine_similarity_matrix(A, B)\n",
    "    # 3. Row-wise average for each test image\n",
    "    rho_avg = np.mean(sim_matrix, axis=1)\n",
    "    \n",
    "    # 4. Subtract reference average\n",
    "    rho_final = rho_avg - reference_avg\n",
    "    # 5. Interval check\n",
    "    min_bound, max_bound = conf_interval\n",
    "    inside_mask = (rho_final >= min_bound) & (rho_final <= max_bound)\n",
    "    percent_inside = 100.0 * inside_mask.sum() / len(rho_final)\n",
    "    # 6. Assign points (1 = inside, 0 = outside)\n",
    "    points = inside_mask.astype(int)\n",
    "\n",
    "    # 7. Tabulate results with IDs and points\n",
    "    results_df = pd.DataFrame({\n",
    "        \"image_id\": B_ids,\n",
    "        \"rho_avg\": rho_avg,\n",
    "        \"rho_final\": rho_final,\n",
    "        \"inside_interval\": inside_mask,\n",
    "        \"point\": points\n",
    "    })\n",
    "    if verbose:\n",
    "        print(results_df.head())\n",
    "        print(f\"\\nInterval: [{min_bound:.8f}, {max_bound:.8f}] | Inside: {inside_mask.sum()} / {len(rho_final)} ({percent_inside:.2f}%)\")\n",
    "    if out_csv is not None:\n",
    "        results_df.to_csv(out_csv, index=False)\n",
    "        print(f\"Results saved to {out_csv}\")\n",
    "    return results_df\n",
    "\n",
    "# -------------------- USAGE EXAMPLE --------------------\n",
    "if __name__ == \"__main__\":\n",
    "    # Messidor 1 example\n",
    "    sgd_test_with_ids(\n",
    "        ref_theta_path=\"/drive2/Kuntal/Pysindy-experiment/aptos_theta_data/aptos_all_thetas.npy\",\n",
    "        ref_ids_path=\"/drive2/Kuntal/Pysindy-experiment/aptos_theta_data/aptos_theta_ids.npy\",\n",
    "        test_theta_path=\"/drive2/Kuntal/Pysindy-experiment/eyepacs_theta_data/eyepacs_all_thetas.npy\",     # <-- test domain\n",
    "        test_ids_path=\"/drive2/Kuntal/Pysindy-experiment/eyepacs_theta_data/eyepacs_theta_ids.npy\",      # <-- test ids\n",
    "        reference_avg=0.018978290495836176,\n",
    "        conf_interval=(-0.0596098734708454, 0.0596098734708454),\n",
    "        out_csv=\"/drive2/Kuntal/Pysindy-experiment/pysindy/SDG/aptos/sgd_aptos-on-eyepacks_results.csv\",\n",
    "    )\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# M1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reference shape: (3662, 60), Test shape: (1200, 60)\n",
      "                     image_id   rho_avg  rho_final  inside_interval  point\n",
      "0  20051019_38557_0100_PP.tif  0.080112   0.061134            False      0\n",
      "1  20051020_43808_0100_PP.tif -0.112445  -0.131423            False      0\n",
      "2  20051020_43832_0100_PP.tif -0.052179  -0.071157            False      0\n",
      "3  20051020_43882_0100_PP.tif  0.114782   0.095804            False      0\n",
      "4  20051020_43906_0100_PP.tif -0.033198  -0.052176             True      1\n",
      "\n",
      "Interval: [-0.05960987, 0.05960987] | Inside: 703 / 1200 (58.58%)\n",
      "Results saved to /drive2/Kuntal/Pysindy-experiment/pysindy/SDG/aptos/sgd_aptos-on-M1_results.csv\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "def load_theta_and_ids(theta_path, ids_path):\n",
    "    \"\"\"Loads theta vectors and corresponding IDs, flattening each theta to 1D.\"\"\"\n",
    "    thetas = np.load(theta_path)\n",
    "    ids = np.load(ids_path)\n",
    "    # Flatten each theta (N, 3, 20) -> (N, 60)\n",
    "    thetas = thetas.reshape(thetas.shape[0], -1)\n",
    "    # Convert to flat string if IDs are bytes\n",
    "    if ids.dtype.type is np.bytes_:\n",
    "        ids = np.array([x.decode() for x in ids])\n",
    "    return thetas, ids\n",
    "\n",
    "\n",
    "def cosine_similarity_matrix(A, B):\n",
    "    \"\"\"\n",
    "    Compute cosine similarities between all vectors in A (N, D) and B (M, D).\n",
    "    Returns (M, N) matrix where entry (i, j) = cos_sim(B[i], A[j])\n",
    "    \"\"\"\n",
    "    # Compute dot products for all pairs\n",
    "    dot_products = np.dot(B, A.T)  # (M, N)\n",
    "    # Compute norms\n",
    "    B_norms = np.linalg.norm(B, axis=1, keepdims=True)    # (M, 1)\n",
    "    A_norms = np.linalg.norm(A, axis=1, keepdims=True)    # (N, 1)\n",
    "    # Outer product of norms: (M, 1) * (1, N) -> (M, N)\n",
    "    norm_matrix = B_norms @ A_norms.T + 1e-8\n",
    "    # Elementwise division\n",
    "    return dot_products / norm_matrix\n",
    "\n",
    "\n",
    "def sgd_test_with_ids(\n",
    "    ref_theta_path, ref_ids_path,\n",
    "    test_theta_path, test_ids_path,\n",
    "    reference_avg,\n",
    "    conf_interval,\n",
    "    out_csv=None,\n",
    "    verbose=True\n",
    "):\n",
    "    # 1. Load data\n",
    "    A, A_ids = load_theta_and_ids(ref_theta_path, ref_ids_path)\n",
    "    B, B_ids = load_theta_and_ids(test_theta_path, test_ids_path)\n",
    "    if verbose:\n",
    "        print(f\"Reference shape: {A.shape}, Test shape: {B.shape}\")\n",
    "\n",
    "    # 2. Cosine similarity matrix\n",
    "    sim_matrix = cosine_similarity_matrix(A, B)\n",
    "    # 3. Row-wise average for each test image\n",
    "    rho_avg = np.mean(sim_matrix, axis=1)\n",
    "    \n",
    "    # 4. Subtract reference average\n",
    "    rho_final = rho_avg - reference_avg\n",
    "    # 5. Interval check\n",
    "    min_bound, max_bound = conf_interval\n",
    "    inside_mask = (rho_final >= min_bound) & (rho_final <= max_bound)\n",
    "    percent_inside = 100.0 * inside_mask.sum() / len(rho_final)\n",
    "    # 6. Assign points (1 = inside, 0 = outside)\n",
    "    points = inside_mask.astype(int)\n",
    "\n",
    "    # 7. Tabulate results with IDs and points\n",
    "    results_df = pd.DataFrame({\n",
    "        \"image_id\": B_ids,\n",
    "        \"rho_avg\": rho_avg,\n",
    "        \"rho_final\": rho_final,\n",
    "        \"inside_interval\": inside_mask,\n",
    "        \"point\": points\n",
    "    })\n",
    "    if verbose:\n",
    "        print(results_df.head())\n",
    "        print(f\"\\nInterval: [{min_bound:.8f}, {max_bound:.8f}] | Inside: {inside_mask.sum()} / {len(rho_final)} ({percent_inside:.2f}%)\")\n",
    "    if out_csv is not None:\n",
    "        results_df.to_csv(out_csv, index=False)\n",
    "        print(f\"Results saved to {out_csv}\")\n",
    "    return results_df\n",
    "\n",
    "# -------------------- USAGE EXAMPLE --------------------\n",
    "if __name__ == \"__main__\":\n",
    "    # Messidor 1 example\n",
    "    sgd_test_with_ids(\n",
    "        ref_theta_path=\"/drive2/Kuntal/Pysindy-experiment/aptos_theta_data/aptos_all_thetas.npy\",\n",
    "        ref_ids_path=\"/drive2/Kuntal/Pysindy-experiment/aptos_theta_data/aptos_theta_ids.npy\",\n",
    "        test_theta_path=\"/drive2/Kuntal/Pysindy-experiment/M1-output/messidor_all_thetas.npy\",     # <-- test domain\n",
    "        test_ids_path=\"/drive2/Kuntal/Pysindy-experiment/M1-output/messidor_theta_ids.npy\",        # <-- test ids\n",
    "        reference_avg=0.018978290495836176,\n",
    "        conf_interval=(-0.0596098734708454, 0.0596098734708454),\n",
    "        out_csv=\"/drive2/Kuntal/Pysindy-experiment/pysindy/SDG/aptos/sgd_aptos-on-M1_results.csv\",\n",
    "    )\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# M2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reference shape: (3662, 60), Test shape: (1057, 60)\n",
      "                     image_id   rho_avg  rho_final  inside_interval  point\n",
      "0  20051020_43808_0100_PP.png -0.112445  -0.131423            False      0\n",
      "1  20051020_43832_0100_PP.png -0.040123  -0.059102             True      1\n",
      "2  20051020_43882_0100_PP.png  0.114782   0.095804            False      0\n",
      "3  20051020_43906_0100_PP.png -0.094630  -0.113609            False      0\n",
      "4  20051020_44261_0100_PP.png  0.004205  -0.014774             True      1\n",
      "\n",
      "Interval: [-0.05960987, 0.05960987] | Inside: 650 / 1057 (61.49%)\n",
      "Results saved to /drive2/Kuntal/Pysindy-experiment/pysindy/SDG/aptos/sgd_aptos-on-M2_results.csv\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "def load_theta_and_ids(theta_path, ids_path):\n",
    "    \"\"\"Loads theta vectors and corresponding IDs, flattening each theta to 1D.\"\"\"\n",
    "    thetas = np.load(theta_path)\n",
    "    ids = np.load(ids_path)\n",
    "    # Flatten each theta (N, 3, 20) -> (N, 60)\n",
    "    thetas = thetas.reshape(thetas.shape[0], -1)\n",
    "    # Convert to flat string if IDs are bytes\n",
    "    if ids.dtype.type is np.bytes_:\n",
    "        ids = np.array([x.decode() for x in ids])\n",
    "    return thetas, ids\n",
    "\n",
    "\n",
    "def cosine_similarity_matrix(A, B):\n",
    "    \"\"\"\n",
    "    Compute cosine similarities between all vectors in A (N, D) and B (M, D).\n",
    "    Returns (M, N) matrix where entry (i, j) = cos_sim(B[i], A[j])\n",
    "    \"\"\"\n",
    "    # Compute dot products for all pairs\n",
    "    dot_products = np.dot(B, A.T)  # (M, N)\n",
    "    # Compute norms\n",
    "    B_norms = np.linalg.norm(B, axis=1, keepdims=True)    # (M, 1)\n",
    "    A_norms = np.linalg.norm(A, axis=1, keepdims=True)    # (N, 1)\n",
    "    # Outer product of norms: (M, 1) * (1, N) -> (M, N)\n",
    "    norm_matrix = B_norms @ A_norms.T + 1e-8\n",
    "    # Elementwise division\n",
    "    return dot_products / norm_matrix\n",
    "\n",
    "\n",
    "def sgd_test_with_ids(\n",
    "    ref_theta_path, ref_ids_path,\n",
    "    test_theta_path, test_ids_path,\n",
    "    reference_avg,\n",
    "    conf_interval,\n",
    "    out_csv=None,\n",
    "    verbose=True\n",
    "):\n",
    "    # 1. Load data\n",
    "    A, A_ids = load_theta_and_ids(ref_theta_path, ref_ids_path)\n",
    "    B, B_ids = load_theta_and_ids(test_theta_path, test_ids_path)\n",
    "    if verbose:\n",
    "        print(f\"Reference shape: {A.shape}, Test shape: {B.shape}\")\n",
    "\n",
    "    # 2. Cosine similarity matrix\n",
    "    sim_matrix = cosine_similarity_matrix(A, B)\n",
    "    # 3. Row-wise average for each test image\n",
    "    rho_avg = np.mean(sim_matrix, axis=1)\n",
    "    \n",
    "    # 4. Subtract reference average\n",
    "    rho_final = rho_avg - reference_avg\n",
    "    # 5. Interval check\n",
    "    min_bound, max_bound = conf_interval\n",
    "    inside_mask = (rho_final >= min_bound) & (rho_final <= max_bound)\n",
    "    percent_inside = 100.0 * inside_mask.sum() / len(rho_final)\n",
    "    # 6. Assign points (1 = inside, 0 = outside)\n",
    "    points = inside_mask.astype(int)\n",
    "\n",
    "    # 7. Tabulate results with IDs and points\n",
    "    results_df = pd.DataFrame({\n",
    "        \"image_id\": B_ids,\n",
    "        \"rho_avg\": rho_avg,\n",
    "        \"rho_final\": rho_final,\n",
    "        \"inside_interval\": inside_mask,\n",
    "        \"point\": points\n",
    "    })\n",
    "    if verbose:\n",
    "        print(results_df.head())\n",
    "        print(f\"\\nInterval: [{min_bound:.8f}, {max_bound:.8f}] | Inside: {inside_mask.sum()} / {len(rho_final)} ({percent_inside:.2f}%)\")\n",
    "    if out_csv is not None:\n",
    "        results_df.to_csv(out_csv, index=False)\n",
    "        print(f\"Results saved to {out_csv}\")\n",
    "    return results_df\n",
    "\n",
    "# -------------------- USAGE EXAMPLE --------------------\n",
    "if __name__ == \"__main__\":\n",
    "    # Messidor 1 example\n",
    "    sgd_test_with_ids(\n",
    "        ref_theta_path=\"/drive2/Kuntal/Pysindy-experiment/aptos_theta_data/aptos_all_thetas.npy\",\n",
    "        ref_ids_path=\"/drive2/Kuntal/Pysindy-experiment/aptos_theta_data/aptos_theta_ids.npy\",\n",
    "        test_theta_path=\"/drive2/Kuntal/Pysindy-experiment/M2-output/messidor2_all_thetas.npy\",     # <-- test domain\n",
    "        test_ids_path=\"/drive2/Kuntal/Pysindy-experiment/M2-output/messidor2_theta_ids.npy\",        # <-- test ids\n",
    "        reference_avg=0.018978290495836176,\n",
    "        conf_interval=(-0.0596098734708454, 0.0596098734708454),\n",
    "        out_csv=\"/drive2/Kuntal/Pysindy-experiment/pysindy/SDG/aptos/sgd_aptos-on-M2_results.csv\",\n",
    "    )\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
