{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from util_results import Results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from te_datasim.jointprocess import MVJointProcessSimulator\n",
    "from te_datasim.lineargaussian import MVLinearGaussianSimulator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from reference_cmigan import TE_cmigan\n",
    "EPOCHS = 2500\n",
    "BATCH_SIZE = 1000"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Basic Validity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "REPLICATES = 5\n",
    "SAMPLE_SIZE = 10000"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Linear Gaussian"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Specify the range of lambda values to test\n",
    "lg_lambda_range = list(np.linspace(0, 1, 9, endpoint=True))\n",
    "\n",
    "# Initialize the list of generators with one for each lambda value\n",
    "lg_generator_lst = [MVLinearGaussianSimulator(n_dim=1, coupling=lam) for lam in lg_lambda_range]\n",
    "\n",
    "# get the reference values\n",
    "lg_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in lg_generator_lst]\n",
    "lg_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in lg_generator_lst]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lg_results_TE_X2Y = Results(columns=['method', 'coupling'])\n",
    "lg_results_TE_Y2X = Results(columns=['method', 'coupling'])\n",
    "\n",
    "for r in range(REPLICATES):\n",
    "    print(f\"\\n### REPLICATE {r+1}/{REPLICATES} ###\\n\")\n",
    "    for lam, generator in zip(lg_lambda_range, lg_generator_lst):\n",
    "        print(\"# Coupling = \", lam, \"#\")\n",
    "        # Simulate data\n",
    "        X, Y = generator.simulate(time=SAMPLE_SIZE, seed=r)\n",
    "        # Estimate X -> Y\n",
    "        TE_X2Y = TE_cmigan(X, Y, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "        lg_results_TE_X2Y.write(method='cmigan', coupling=lam, value=TE_X2Y)\n",
    "        # Estimate Y -> X\n",
    "        TE_Y2X = TE_cmigan(Y, X, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "        lg_results_TE_Y2X.write(method='cmigan', coupling=lam, value=TE_Y2X)\n",
    "\n",
    "lg_results_TE_X2Y.df.to_csv('results/cmigan/lg_results_TE_X2Y_bv.csv', index=False)\n",
    "lg_results_TE_Y2X.df.to_csv('results/cmigan/lg_results_TE_Y2X_bv.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Joint Process"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Specify the range of lambda values to test\n",
    "jp_lambda_range = list(np.linspace(-3, 3, 9, endpoint=True))\n",
    "\n",
    "# Initialize the list of generators with one for each lambda value\n",
    "jp_generator_lst = [MVJointProcessSimulator(n_dim=1, lam=lam) for lam in jp_lambda_range]\n",
    "\n",
    "# get the reference values\n",
    "jp_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in jp_generator_lst]\n",
    "jp_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in jp_generator_lst]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "jp_results_TE_X2Y = Results(columns=['method', 'coupling'])\n",
    "jp_results_TE_Y2X = Results(columns=['method', 'coupling'])\n",
    "\n",
    "for r in range(REPLICATES):\n",
    "    print(f\"\\n### REPLICATE {r+1}/{REPLICATES} ###\\n\")\n",
    "    for lam, generator in zip(jp_lambda_range, jp_generator_lst):\n",
    "        print(\"# Coupling = \", lam, \"#\")\n",
    "        # Simulate data\n",
    "        X, Y = generator.simulate(time=SAMPLE_SIZE, seed=r)\n",
    "        # Estimate X -> Y\n",
    "        TE_X2Y = TE_cmigan(X, Y, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "        jp_results_TE_X2Y.write(method='cmigan', coupling=lam, value=TE_X2Y)\n",
    "        # Estimate Y -> X\n",
    "        TE_Y2X = TE_cmigan(Y, X, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "        jp_results_TE_Y2X.write(method='cmigan', coupling=lam, value=TE_Y2X)\n",
    "\n",
    "jp_results_TE_X2Y.df.to_csv('results/cmigan/jp_results_TE_X2Y_bv.csv', index=False)\n",
    "jp_results_TE_Y2X.df.to_csv('results/cmigan/jp_results_TE_Y2X_bv.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Sample size scaling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lg_generator = MVLinearGaussianSimulator(n_dim=1, coupling=0.5)\n",
    "jp_generator = MVJointProcessSimulator(n_dim=1, lam=0.0)\n",
    "sample_sizes = [500, 1000, 5000, 10000, 50000, 100000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lg_results_TE_X2Y = Results(columns=['method', 'sample_size'])\n",
    "lg_results_TE_Y2X = Results(columns=['method', 'sample_size'])\n",
    "\n",
    "for r in range(REPLICATES):\n",
    "    print(f\"\\n### REPLICATE {r+1}/{REPLICATES} ###\\n\")\n",
    "    for samples in sample_sizes:\n",
    "        print(\"# Samples = \", samples, \"#\")\n",
    "        # Simulate data\n",
    "        X, Y = lg_generator.simulate(time=samples, seed=r)\n",
    "        # Estimate X -> Y\n",
    "        TE_X2Y = TE_cmigan(X, Y, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "        lg_results_TE_X2Y.write(method='cmigan', sample_size=samples, value=TE_X2Y)\n",
    "        # Estimate Y -> X\n",
    "        TE_Y2X = TE_cmigan(Y, X, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "        lg_results_TE_Y2X.write(method='cmigan', sample_size=samples, value=TE_Y2X)\n",
    "\n",
    "lg_results_TE_X2Y.df.to_csv('results/cmigan/lg_results_TE_X2Y_ss.csv', index=False)\n",
    "lg_results_TE_Y2X.df.to_csv('results/cmigan/lg_results_TE_Y2X_ss.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "jp_results_TE_X2Y = Results(columns=['method', 'sample_size'])\n",
    "jp_results_TE_Y2X = Results(columns=['method', 'sample_size'])\n",
    "\n",
    "for r in range(REPLICATES):\n",
    "    print(f\"\\n### REPLICATE {r+1}/{REPLICATES} ###\\n\")\n",
    "    for samples in sample_sizes:\n",
    "        print(\"# Samples = \", samples, \"#\")\n",
    "        # Simulate data\n",
    "        X, Y = jp_generator.simulate(time=samples, seed=r)\n",
    "        # Estimate X -> Y\n",
    "        TE_X2Y = TE_cmigan(X, Y, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "        jp_results_TE_X2Y.write(method='cmigan', sample_size=samples, value=TE_X2Y)\n",
    "        # Estimate Y -> X\n",
    "        TE_Y2X = TE_cmigan(Y, X, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "        jp_results_TE_Y2X.write(method='cmigan', sample_size=samples, value=TE_Y2X)\n",
    "\n",
    "jp_results_TE_X2Y.df.to_csv('results/cmigan/jp_results_TE_X2Y_ss.csv', index=False)\n",
    "jp_results_TE_Y2X.df.to_csv('results/cmigan/jp_results_TE_Y2X_ss.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dimensionality Scaling with redundant dimensions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dim_range = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n",
    "sample_sizes = [10000, 100000]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Linear Gaussian"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize the list of generators with one for each dimension\n",
    "lg_generator_lst = [MVLinearGaussianSimulator(n_dim=dim, n_redundant_dim=dim-1) for dim in dim_range]\n",
    "# Get the reference values\n",
    "lg_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in lg_generator_lst]\n",
    "lg_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in lg_generator_lst]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lg_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])\n",
    "lg_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])\n",
    "\n",
    "for r in range(REPLICATES):\n",
    "    print(f\"\\n### REPLICATE {r+1}/{REPLICATES} ###\\n\")\n",
    "    for dim, generator in zip(dim_range, lg_generator_lst):\n",
    "        print(\"## Dim = \", dim, \"#\")\n",
    "        for samples in sample_sizes:\n",
    "            print(\"# Sample size = \", samples, \"#\")\n",
    "            # Simulate data\n",
    "            X, Y = generator.simulate(time=samples, seed=r)\n",
    "            # Estimate X -> Y\n",
    "            TE_X2Y = TE_cmigan(X, Y, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "            lg_results_TE_X2Y.write(method='cmigan', n_dim=dim, sample_size=samples, value=TE_X2Y)\n",
    "            # Estimate Y -> X\n",
    "            TE_Y2X = TE_cmigan(Y, X, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "            lg_results_TE_Y2X.write(method='cmigan', n_dim=dim, sample_size=samples, value=TE_Y2X)\n",
    "\n",
    "lg_results_TE_X2Y.df.to_csv('results/cmigan/lg_results_TE_X2Y_dimred.csv', index=False)\n",
    "lg_results_TE_Y2X.df.to_csv('results/cmigan/lg_results_TE_Y2X_dimred.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Joint Process"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize the list of generators with one for each dimension\n",
    "jp_generator_lst = [MVJointProcessSimulator(n_dim=dim, n_redundant_dim=dim-1, lam=0.0) for dim in dim_range]\n",
    "# Get the reference values\n",
    "jp_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in jp_generator_lst]\n",
    "jp_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in jp_generator_lst]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "jp_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])\n",
    "jp_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])\n",
    "\n",
    "for r in range(REPLICATES):\n",
    "    print(f\"\\n### REPLICATE {r+1}/{REPLICATES} ###\\n\")\n",
    "    for dim, generator in zip(dim_range, jp_generator_lst):\n",
    "        print(\"## Dim = \", dim, \"#\")\n",
    "        for samples in sample_sizes:\n",
    "            print(\"# Sample size = \", samples, \"#\")\n",
    "            # Simulate data\n",
    "            X, Y = generator.simulate(time=samples, seed=r)\n",
    "            # Estimate X -> Y\n",
    "            TE_X2Y = TE_cmigan(X, Y, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "            jp_results_TE_X2Y.write(method='cmigan', n_dim=dim, sample_size=samples, value=TE_X2Y)\n",
    "            # Estimate Y -> X\n",
    "            TE_Y2X = TE_cmigan(Y, X, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "            jp_results_TE_Y2X.write(method='cmigan', n_dim=dim, sample_size=samples, value=TE_Y2X)\n",
    "\n",
    "jp_results_TE_X2Y.df.to_csv('results/cmigan/jp_results_TE_X2Y_dimred.csv', index=False)\n",
    "jp_results_TE_Y2X.df.to_csv('results/cmigan/jp_results_TE_Y2X_dimred.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dimensionality Scaling without redundant dimensions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dim_range = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n",
    "sample_sizes = [10000, 100000]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Linear Gaussian"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize the list of generators with one for each dimension\n",
    "lg_generator_lst = [MVLinearGaussianSimulator(n_dim=dim) for dim in dim_range]\n",
    "# Get the reference values\n",
    "lg_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in lg_generator_lst]\n",
    "lg_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in lg_generator_lst]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lg_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])\n",
    "lg_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])\n",
    "\n",
    "for r in range(REPLICATES):\n",
    "    print(f\"\\n### REPLICATE {r+1}/{REPLICATES} ###\\n\")\n",
    "    for dim, generator in zip(dim_range, lg_generator_lst):\n",
    "        print(\"## Dim = \", dim, \"#\")\n",
    "        for samples in sample_sizes:\n",
    "            print(\"# Sample size = \", samples, \"#\")\n",
    "            # Simulate data\n",
    "            X, Y = generator.simulate(time=samples, seed=r)\n",
    "            # Estimate X -> Y\n",
    "            TE_X2Y = TE_cmigan(X, Y, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "            lg_results_TE_X2Y.write(method='cmigan', n_dim=dim, sample_size=samples, value=TE_X2Y)\n",
    "            # Estimate Y -> X\n",
    "            TE_Y2X = TE_cmigan(Y, X, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "            lg_results_TE_Y2X.write(method='cmigan', n_dim=dim, sample_size=samples, value=TE_Y2X)\n",
    "\n",
    "lg_results_TE_X2Y.df.to_csv('results/cmigan/lg_results_TE_X2Y_dim.csv', index=False)\n",
    "lg_results_TE_Y2X.df.to_csv('results/cmigan/lg_results_TE_Y2X_dim.csv', index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Joint Process"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize the list of generators with one for each dimension\n",
    "jp_generator_lst = [MVJointProcessSimulator(n_dim=dim, lam=0.0) for dim in dim_range]\n",
    "# Get the reference values\n",
    "jp_TE_X2Y_ref_lst = [generator.analytic_transfer_entropy('X', 'Y') for generator in jp_generator_lst]\n",
    "jp_TE_Y2X_ref_lst = [generator.analytic_transfer_entropy('Y', 'X') for generator in jp_generator_lst]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "jp_results_TE_X2Y = Results(columns=['method', 'n_dim', 'sample_size'])\n",
    "jp_results_TE_Y2X = Results(columns=['method', 'n_dim', 'sample_size'])\n",
    "\n",
    "for r in range(REPLICATES):\n",
    "    print(f\"\\n### REPLICATE {r+1}/{REPLICATES} ###\\n\")\n",
    "    for dim, generator in zip(dim_range, jp_generator_lst):\n",
    "        print(\"## Dim = \", dim, \"#\")\n",
    "        for samples in sample_sizes:\n",
    "            print(\"# Sample size = \", samples, \"#\")\n",
    "            # Simulate data\n",
    "            X, Y = generator.simulate(time=samples, seed=r)\n",
    "            # Estimate X -> Y\n",
    "            TE_X2Y = TE_cmigan(X, Y, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "            jp_results_TE_X2Y.write(method='cmigan', n_dim=dim, sample_size=samples, value=TE_X2Y)\n",
    "            # Estimate Y -> X\n",
    "            TE_Y2X = TE_cmigan(Y, X, epochs=EPOCHS, batch_size=BATCH_SIZE)\n",
    "            jp_results_TE_Y2X.write(method='cmigan', n_dim=dim, sample_size=samples, value=TE_Y2X)\n",
    "\n",
    "jp_results_TE_X2Y.df.to_csv('results/cmigan/jp_results_TE_X2Y_dim.csv', index=False)\n",
    "jp_results_TE_Y2X.df.to_csv('results/cmigan/jp_results_TE_Y2X_dim.csv', index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
