{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Messidor - 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reference shape: (1057, 60), Test shape: (1200, 60)\n",
      "                     image_id   rho_avg  rho_final  inside_interval  point\n",
      "0  20051019_38557_0100_PP.tif  0.072927   0.031209             True      1\n",
      "1  20051020_43808_0100_PP.tif -0.081620  -0.123337            False      0\n",
      "2  20051020_43832_0100_PP.tif -0.006478  -0.048196            False      0\n",
      "3  20051020_43882_0100_PP.tif  0.115378   0.073661            False      0\n",
      "4  20051020_43906_0100_PP.tif -0.030436  -0.072154            False      0\n",
      "\n",
      "Interval: [-0.04399879, 0.04399879] | Inside: 504 / 1200 (42.00%)\n",
      "Results saved to /drive2/Kuntal/Pysindy-experiment/pysindy/SDG/M2/sgd_m2-on-messidor-1_results.csv\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "def load_theta_and_ids(theta_path, ids_path):\n",
    "    \"\"\"Loads theta vectors and corresponding IDs, flattening each theta to 1D.\"\"\"\n",
    "    thetas = np.load(theta_path)\n",
    "    ids = np.load(ids_path)\n",
    "    # Flatten each theta (N, 3, 20) -> (N, 60)\n",
    "    thetas = thetas.reshape(thetas.shape[0], -1)\n",
    "    # Convert to flat string if IDs are bytes\n",
    "    if ids.dtype.type is np.bytes_:\n",
    "        ids = np.array([x.decode() for x in ids])\n",
    "    return thetas, ids\n",
    "\n",
    "\n",
    "def cosine_similarity_matrix(A, B):\n",
    "    \"\"\"\n",
    "    Compute cosine similarities between all vectors in A (N, D) and B (M, D).\n",
    "    Returns (M, N) matrix where entry (i, j) = cos_sim(B[i], A[j])\n",
    "    \"\"\"\n",
    "    # Compute dot products for all pairs\n",
    "    dot_products = np.dot(B, A.T)  # (M, N)\n",
    "    # Compute norms\n",
    "    B_norms = np.linalg.norm(B, axis=1, keepdims=True)    # (M, 1)\n",
    "    A_norms = np.linalg.norm(A, axis=1, keepdims=True)    # (N, 1)\n",
    "    # Outer product of norms: (M, 1) * (1, N) -> (M, N)\n",
    "    norm_matrix = B_norms @ A_norms.T + 1e-8\n",
    "    # Elementwise division\n",
    "    return dot_products / norm_matrix\n",
    "\n",
    "\n",
    "def sgd_test_with_ids(\n",
    "    ref_theta_path, ref_ids_path,\n",
    "    test_theta_path, test_ids_path,\n",
    "    reference_avg,\n",
    "    conf_interval,\n",
    "    out_csv=None,\n",
    "    verbose=True\n",
    "):\n",
    "    # 1. Load data\n",
    "    A, A_ids = load_theta_and_ids(ref_theta_path, ref_ids_path)\n",
    "    B, B_ids = load_theta_and_ids(test_theta_path, test_ids_path)\n",
    "    if verbose:\n",
    "        print(f\"Reference shape: {A.shape}, Test shape: {B.shape}\")\n",
    "\n",
    "    # 2. Cosine similarity matrix\n",
    "    sim_matrix = cosine_similarity_matrix(A, B)\n",
    "    # 3. Row-wise average for each test image\n",
    "    rho_avg = np.mean(sim_matrix, axis=1)\n",
    "    \n",
    "    # 4. Subtract reference average\n",
    "    rho_final = rho_avg - reference_avg\n",
    "    # 5. Interval check\n",
    "    min_bound, max_bound = conf_interval\n",
    "    inside_mask = (rho_final >= min_bound) & (rho_final <= max_bound)\n",
    "    percent_inside = 100.0 * inside_mask.sum() / len(rho_final)\n",
    "    # 6. Assign points (1 = inside, 0 = outside)\n",
    "    points = inside_mask.astype(int)\n",
    "\n",
    "    # 7. Tabulate results with IDs and points\n",
    "    results_df = pd.DataFrame({\n",
    "        \"image_id\": B_ids,\n",
    "        \"rho_avg\": rho_avg,\n",
    "        \"rho_final\": rho_final,\n",
    "        \"inside_interval\": inside_mask,\n",
    "        \"point\": points\n",
    "    })\n",
    "    if verbose:\n",
    "        print(results_df.head())\n",
    "        print(f\"\\nInterval: [{min_bound:.8f}, {max_bound:.8f}] | Inside: {inside_mask.sum()} / {len(rho_final)} ({percent_inside:.2f}%)\")\n",
    "    if out_csv is not None:\n",
    "        results_df.to_csv(out_csv, index=False)\n",
    "        print(f\"Results saved to {out_csv}\")\n",
    "    return results_df\n",
    "\n",
    "# -------------------- USAGE EXAMPLE --------------------\n",
    "if __name__ == \"__main__\":\n",
    "    # Messidor 1 example\n",
    "    sgd_test_with_ids(\n",
    "        ref_theta_path=\"/drive2/Kuntal/Pysindy-experiment/M2-output/messidor2_all_thetas.npy\",\n",
    "        ref_ids_path=\"/drive2/Kuntal/Pysindy-experiment/M2-output/messidor2_theta_ids.npy\",\n",
    "        test_theta_path=\"/drive2/Kuntal/Pysindy-experiment/M1-output/messidor_all_thetas.npy\",     # <-- test domain\n",
    "        test_ids_path=\"/drive2/Kuntal/Pysindy-experiment/M1-output/messidor_theta_ids.npy\",        # <-- test ids\n",
    "        reference_avg=0.041717564380119414,\n",
    "        conf_interval=(-0.043998791641485506, 0.043998791641485506),\n",
    "        out_csv=\"/drive2/Kuntal/Pysindy-experiment/pysindy/SDG/M2/sgd_m2-on-messidor-1_results.csv\",\n",
    "    )\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Aptos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reference shape: (1057, 60), Test shape: (3662, 60)\n",
      "       image_id   rho_avg  rho_final  inside_interval  point\n",
      "0  000c1434d8d7  0.034137  -0.007580             True      1\n",
      "1  001639a390f0  0.094633   0.052916            False      0\n",
      "2  0024cdab0c1e -0.119469  -0.161187            False      0\n",
      "3  002c21358ce6 -0.041237  -0.082955            False      0\n",
      "4  005b95c28852  0.013128  -0.028589             True      1\n",
      "\n",
      "Interval: [-0.04399879, 0.04399879] | Inside: 1619 / 3662 (44.21%)\n",
      "Results saved to /drive2/Kuntal/Pysindy-experiment/pysindy/SDG/M2/sgd_M2-on-aptos_results.csv\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "def load_theta_and_ids(theta_path, ids_path):\n",
    "    \"\"\"Loads theta vectors and corresponding IDs, flattening each theta to 1D.\"\"\"\n",
    "    thetas = np.load(theta_path)\n",
    "    ids = np.load(ids_path)\n",
    "    # Flatten each theta (N, 3, 20) -> (N, 60)\n",
    "    thetas = thetas.reshape(thetas.shape[0], -1)\n",
    "    # Convert to flat string if IDs are bytes\n",
    "    if ids.dtype.type is np.bytes_:\n",
    "        ids = np.array([x.decode() for x in ids])\n",
    "    return thetas, ids\n",
    "\n",
    "\n",
    "def cosine_similarity_matrix(A, B):\n",
    "    \"\"\"\n",
    "    Compute cosine similarities between all vectors in A (N, D) and B (M, D).\n",
    "    Returns (M, N) matrix where entry (i, j) = cos_sim(B[i], A[j])\n",
    "    \"\"\"\n",
    "    # Compute dot products for all pairs\n",
    "    dot_products = np.dot(B, A.T)  # (M, N)\n",
    "    # Compute norms\n",
    "    B_norms = np.linalg.norm(B, axis=1, keepdims=True)    # (M, 1)\n",
    "    A_norms = np.linalg.norm(A, axis=1, keepdims=True)    # (N, 1)\n",
    "    # Outer product of norms: (M, 1) * (1, N) -> (M, N)\n",
    "    norm_matrix = B_norms @ A_norms.T + 1e-8\n",
    "    # Elementwise division\n",
    "    return dot_products / norm_matrix\n",
    "\n",
    "\n",
    "def sgd_test_with_ids(\n",
    "    ref_theta_path, ref_ids_path,\n",
    "    test_theta_path, test_ids_path,\n",
    "    reference_avg,\n",
    "    conf_interval,\n",
    "    out_csv=None,\n",
    "    verbose=True\n",
    "):\n",
    "    # 1. Load data\n",
    "    A, A_ids = load_theta_and_ids(ref_theta_path, ref_ids_path)\n",
    "    B, B_ids = load_theta_and_ids(test_theta_path, test_ids_path)\n",
    "    if verbose:\n",
    "        print(f\"Reference shape: {A.shape}, Test shape: {B.shape}\")\n",
    "\n",
    "    # 2. Cosine similarity matrix\n",
    "    sim_matrix = cosine_similarity_matrix(A, B)\n",
    "    # 3. Row-wise average for each test image\n",
    "    rho_avg = np.mean(sim_matrix, axis=1)\n",
    "    \n",
    "    # 4. Subtract reference average\n",
    "    rho_final = rho_avg - reference_avg\n",
    "    # 5. Interval check\n",
    "    min_bound, max_bound = conf_interval\n",
    "    inside_mask = (rho_final >= min_bound) & (rho_final <= max_bound)\n",
    "    percent_inside = 100.0 * inside_mask.sum() / len(rho_final)\n",
    "    # 6. Assign points (1 = inside, 0 = outside)\n",
    "    points = inside_mask.astype(int)\n",
    "\n",
    "    # 7. Tabulate results with IDs and points\n",
    "    results_df = pd.DataFrame({\n",
    "        \"image_id\": B_ids,\n",
    "        \"rho_avg\": rho_avg,\n",
    "        \"rho_final\": rho_final,\n",
    "        \"inside_interval\": inside_mask,\n",
    "        \"point\": points\n",
    "    })\n",
    "    if verbose:\n",
    "        print(results_df.head())\n",
    "        print(f\"\\nInterval: [{min_bound:.8f}, {max_bound:.8f}] | Inside: {inside_mask.sum()} / {len(rho_final)} ({percent_inside:.2f}%)\")\n",
    "    if out_csv is not None:\n",
    "        results_df.to_csv(out_csv, index=False)\n",
    "        print(f\"Results saved to {out_csv}\")\n",
    "    return results_df\n",
    "\n",
    "# -------------------- USAGE EXAMPLE --------------------\n",
    "if __name__ == \"__main__\":\n",
    "    # Messidor 1 example\n",
    "    sgd_test_with_ids(\n",
    "        ref_theta_path=\"/drive2/Kuntal/Pysindy-experiment/M2-output/messidor2_all_thetas.npy\",\n",
    "        ref_ids_path=\"/drive2/Kuntal/Pysindy-experiment/M2-output/messidor2_theta_ids.npy\",\n",
    "        test_theta_path=\"/drive2/Kuntal/Pysindy-experiment/aptos_theta_data/aptos_all_thetas.npy\",     # <-- test domain\n",
    "        test_ids_path=\"/drive2/Kuntal/Pysindy-experiment/aptos_theta_data/aptos_theta_ids.npy\",        # <-- test ids\n",
    "        reference_avg=0.041717564380119414,\n",
    "        conf_interval=(-0.043998791641485506, 0.043998791641485506),\n",
    "        out_csv=\"/drive2/Kuntal/Pysindy-experiment/pysindy/SDG/M2/sgd_M2-on-aptos_results.csv\",\n",
    "        verbose=True\n",
    "    )\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Eyepacks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Reference shape: (1057, 60), Test shape: (35122, 60)\n",
      "   image_id   rho_avg  rho_final  inside_interval  point\n",
      "0   10_left -0.017071  -0.058788            False      0\n",
      "1  10_right  0.017312  -0.024405             True      1\n",
      "2   13_left -0.041169  -0.082887            False      0\n",
      "3  13_right  0.074569   0.032852             True      1\n",
      "4   15_left  0.009537  -0.032180             True      1\n",
      "\n",
      "Interval: [-0.04399879, 0.04399879] | Inside: 15248 / 35122 (43.41%)\n",
      "Results saved to /drive2/Kuntal/Pysindy-experiment/pysindy/SDG/M2/sgd_M2-on-eyepack_results.csv\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "def load_theta_and_ids(theta_path, ids_path):\n",
    "    \"\"\"Loads theta vectors and corresponding IDs, flattening each theta to 1D.\"\"\"\n",
    "    thetas = np.load(theta_path)\n",
    "    ids = np.load(ids_path)\n",
    "    # Flatten each theta (N, 3, 20) -> (N, 60)\n",
    "    thetas = thetas.reshape(thetas.shape[0], -1)\n",
    "    # Convert to flat string if IDs are bytes\n",
    "    if ids.dtype.type is np.bytes_:\n",
    "        ids = np.array([x.decode() for x in ids])\n",
    "    return thetas, ids\n",
    "\n",
    "\n",
    "def cosine_similarity_matrix(A, B):\n",
    "    \"\"\"\n",
    "    Compute cosine similarities between all vectors in A (N, D) and B (M, D).\n",
    "    Returns (M, N) matrix where entry (i, j) = cos_sim(B[i], A[j])\n",
    "    \"\"\"\n",
    "    # Compute dot products for all pairs\n",
    "    dot_products = np.dot(B, A.T)  # (M, N)\n",
    "    # Compute norms\n",
    "    B_norms = np.linalg.norm(B, axis=1, keepdims=True)    # (M, 1)\n",
    "    A_norms = np.linalg.norm(A, axis=1, keepdims=True)    # (N, 1)\n",
    "    # Outer product of norms: (M, 1) * (1, N) -> (M, N)\n",
    "    norm_matrix = B_norms @ A_norms.T + 1e-8\n",
    "    # Elementwise division\n",
    "    return dot_products / norm_matrix\n",
    "\n",
    "\n",
    "def sgd_test_with_ids(\n",
    "    ref_theta_path, ref_ids_path,\n",
    "    test_theta_path, test_ids_path,\n",
    "    reference_avg,\n",
    "    conf_interval,\n",
    "    out_csv=None,\n",
    "    verbose=True\n",
    "):\n",
    "    # 1. Load data\n",
    "    A, A_ids = load_theta_and_ids(ref_theta_path, ref_ids_path)\n",
    "    B, B_ids = load_theta_and_ids(test_theta_path, test_ids_path)\n",
    "    if verbose:\n",
    "        print(f\"Reference shape: {A.shape}, Test shape: {B.shape}\")\n",
    "\n",
    "    # 2. Cosine similarity matrix\n",
    "    sim_matrix = cosine_similarity_matrix(A, B)\n",
    "    # 3. Row-wise average for each test image\n",
    "    rho_avg = np.mean(sim_matrix, axis=1)\n",
    "    \n",
    "    # 4. Subtract reference average\n",
    "    rho_final = rho_avg - reference_avg\n",
    "    # 5. Interval check\n",
    "    min_bound, max_bound = conf_interval\n",
    "    inside_mask = (rho_final >= min_bound) & (rho_final <= max_bound)\n",
    "    percent_inside = 100.0 * inside_mask.sum() / len(rho_final)\n",
    "    # 6. Assign points (1 = inside, 0 = outside)\n",
    "    points = inside_mask.astype(int)\n",
    "\n",
    "    # 7. Tabulate results with IDs and points\n",
    "    results_df = pd.DataFrame({\n",
    "        \"image_id\": B_ids,\n",
    "        \"rho_avg\": rho_avg,\n",
    "        \"rho_final\": rho_final,\n",
    "        \"inside_interval\": inside_mask,\n",
    "        \"point\": points\n",
    "    })\n",
    "    if verbose:\n",
    "        print(results_df.head())\n",
    "        print(f\"\\nInterval: [{min_bound:.8f}, {max_bound:.8f}] | Inside: {inside_mask.sum()} / {len(rho_final)} ({percent_inside:.2f}%)\")\n",
    "    if out_csv is not None:\n",
    "        results_df.to_csv(out_csv, index=False)\n",
    "        print(f\"Results saved to {out_csv}\")\n",
    "    return results_df\n",
    "\n",
    "# -------------------- USAGE EXAMPLE --------------------\n",
    "if __name__ == \"__main__\":\n",
    "    # Messidor 1 example\n",
    "    sgd_test_with_ids(\n",
    "        ref_theta_path=\"/drive2/Kuntal/Pysindy-experiment/M2-output/messidor2_all_thetas.npy\",\n",
    "        ref_ids_path=\"/drive2/Kuntal/Pysindy-experiment/M2-output/messidor2_theta_ids.npy\",\n",
    "        test_theta_path=\"/drive2/Kuntal/Pysindy-experiment/eyepacs_theta_data/eyepacs_all_thetas.npy\",     # <-- test domain\n",
    "        test_ids_path=\"/drive2/Kuntal/Pysindy-experiment/eyepacs_theta_data/eyepacs_theta_ids.npy\",        # <-- test ids\n",
    "        reference_avg=0.041717564380119414,\n",
    "        conf_interval=(-0.043998791641485506, 0.043998791641485506),\n",
    "        out_csv=\"/drive2/Kuntal/Pysindy-experiment/pysindy/SDG/M2/sgd_M2-on-eyepack_results.csv\",\n",
    "        verbose=True\n",
    "    )\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
