{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Source Code for Appendix C.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from numpy.linalg import eigvals, norm, inv\n",
    "import matplotlib.pyplot as plt\n",
    "np.random.seed(1)\n",
    "\n",
    "# Set sample size and parameters\n",
    "N = 50\n",
    "p = 5\n",
    "q = 10\n",
    "noise = 1\n",
    "\n",
    "# Initialize lists to store the computed values\n",
    "min_eigenvalues_Z = []\n",
    "min_eigenvalues_X = []\n",
    "spectral_norms = []\n",
    "\n",
    "# Number of simulations\n",
    "num_simulations = 10000\n",
    "\n",
    "for _ in range(num_simulations):\n",
    "    # Step 1: Generate Instrument Z\n",
    "    # Z is an (N x q) matrix\n",
    "    Z = np.random.multivariate_normal(mean=np.zeros(q), cov=np.eye(q), size=N)\n",
    "    \n",
    "    # Theta is a (q x p) matrix\n",
    "    Theta = np.random.multivariate_normal(mean=np.zeros(p), cov=np.eye(p), size=q)\n",
    "    \n",
    "    # beta is a (p,) vector\n",
    "    beta = np.random.normal(0, 1, p)\n",
    "    \n",
    "    # Phi is a (p x p) matrix\n",
    "    Phi = np.random.multivariate_normal(mean=np.zeros(p), cov=np.eye(p), size=p)\n",
    "    \n",
    "    # phi is a (p,) vector\n",
    "    phi = np.random.normal(0, 1, p)\n",
    "    \n",
    "    # Step 2: Generate Endogenous X\n",
    "    # U is an (N x p) matrix\n",
    "    U = np.random.multivariate_normal(mean=np.zeros(p), cov=np.eye(p), size=N)  # Unobserved variable causing endogeneity\n",
    "    \n",
    "    # E2 is an (N x p) matrix\n",
    "    E2 = noise * (U @ Phi) + np.random.multivariate_normal(mean=np.zeros(p), cov=np.eye(p), size=N)\n",
    "    \n",
    "    # X is influenced by Z and U; it's an (N x p) matrix\n",
    "    X = (Z @ Theta) + E2\n",
    "    \n",
    "    # E1 is an (N,) vector\n",
    "    E1 = noise * (U @ phi) + np.random.normal(0, 1, N)\n",
    "    \n",
    "    # Y is influenced by X and U directly; it's an (N,) vector\n",
    "    Y = (X @ beta) + E1\n",
    "    \n",
    "    # Compute eigenvalues of (Z^T Z) / N\n",
    "    eigenvalues_Z = eigvals((Z.T @ Z) / N)\n",
    "    min_eigen_Z = np.min(eigenvalues_Z)\n",
    "    min_eigenvalues_Z.append(min_eigen_Z)\n",
    "    \n",
    "    # Compute projection matrix P onto the column space of Z\n",
    "    ZTZ_inv = inv(Z.T @ Z)\n",
    "    P = Z @ ZTZ_inv @ Z.T\n",
    "    \n",
    "    # Compute eigenvalues of (X^T P X) / N\n",
    "    eigenvalues_X = eigvals((X.T @ P @ X) / N)\n",
    "    min_eigen_X = np.min(eigenvalues_X)\n",
    "    min_eigenvalues_X.append(min_eigen_X)\n",
    "    \n",
    "    # Compute the spectral norm of (Z^T E2) / N\n",
    "    spectral_norm = norm((Z.T @ E2) / N, ord=2)\n",
    "    spectral_norms.append(spectral_norm)\n",
    "\n",
    "# Calculate the 1% cutoff points\n",
    "cutoff_min_eigen_Z = np.percentile(min_eigenvalues_Z, 0.33)\n",
    "cutoff_min_eigen_X = np.percentile(min_eigenvalues_X, 0.33)\n",
    "cutoff_spectral_norm = np.percentile(spectral_norms, 99.67)\n",
    "\n",
    "# Plot histograms of the computed values with 1% cutoff lines\n",
    "# Histogram of minimum eigenvalues of (Z^T Z) / N\n",
    "plt.figure(figsize=(10, 6))\n",
    "plt.hist(min_eigenvalues_Z, bins=30, color='skyblue', edgecolor='black')\n",
    "plt.axvline(cutoff_min_eigen_Z, color='red', linestyle='dashed', linewidth=2, label=f'0.33% Quantile: {cutoff_min_eigen_Z:.4f}')\n",
    "plt.title(r'$\\lambda_\\min(\\frac{Z^T Z}{N})$', fontsize=24)\n",
    "plt.xlabel('Minimum Eigenvalue', fontsize=24)\n",
    "plt.ylabel('Frequency', fontsize=24)\n",
    "plt.legend(fontsize=18)\n",
    "plt.tick_params(axis='both', which='major', labelsize=18)\n",
    "plt.grid(True)\n",
    "plt.savefig(\"figures/sampcond1.png\", dpi=300, bbox_inches='tight')\n",
    "plt.show()\n",
    "\n",
    "# Histogram of minimum eigenvalues of (X^T P X) / N\n",
    "plt.figure(figsize=(10, 6))\n",
    "plt.hist(min_eigenvalues_X, bins=30, color='lightgreen', edgecolor='black')\n",
    "plt.axvline(cutoff_min_eigen_X, color='red', linestyle='dashed', linewidth=2, label=f'0.33% Quantile: {cutoff_min_eigen_X:.4f}')\n",
    "plt.title(r'$\\lambda_\\min(\\frac{X^T P X)}{N})$', fontsize=24)\n",
    "plt.xlabel('Minimum Eigenvalue', fontsize=24)\n",
    "plt.ylabel('Frequency', fontsize=24)\n",
    "plt.legend(fontsize=18)\n",
    "plt.tick_params(axis='both', which='major', labelsize=18)\n",
    "plt.grid(True)\n",
    "plt.savefig(\"figures/sampcond2.png\", dpi=300, bbox_inches='tight')\n",
    "plt.show()\n",
    "\n",
    "# Histogram of spectral norms of (Z^T E2) / N\n",
    "plt.figure(figsize=(10, 6))\n",
    "plt.hist(spectral_norms, bins=30, color='salmon', edgecolor='black')\n",
    "plt.axvline(cutoff_spectral_norm, color='red', linestyle='dashed', linewidth=2, label=f'99.67% Quantile: {cutoff_spectral_norm:.4f}')\n",
    "plt.title(r'$\\|\\frac{Z^T \\mathcal{E}_2}{N}\\|$', fontsize=24)\n",
    "plt.xlabel('Spectral Norm', fontsize=24)\n",
    "plt.ylabel('Frequency', fontsize=24)\n",
    "plt.legend(fontsize=18)\n",
    "plt.tick_params(axis='both', which='major', labelsize=18)\n",
    "plt.grid(True)\n",
    "plt.savefig(\"figures/sampcond3.png\", dpi=300, bbox_inches='tight')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Source Code for Appendix C.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy.stats import norm\n",
    "from scipy.stats import multivariate_normal as mvn\n",
    "from statsmodels.api import OLS\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Set seed for reproducibility\n",
    "np.random.seed(1)\n",
    "\n",
    "# Sample size and dimensions\n",
    "N = 50\n",
    "p = 5\n",
    "q = 10\n",
    "noise = 1\n",
    "iterations = 100\n",
    "\n",
    "# Step 1: Generate Instrument Z\n",
    "Z = mvn.rvs(mean=np.zeros(q), cov=np.eye(q), size=N)\n",
    "Theta = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=q)\n",
    "beta = norm.rvs(0, 1, size=p)\n",
    "Phi = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=p)\n",
    "phi = norm.rvs(0, 1, size=p)\n",
    "\n",
    "# Step 2: Generate Endogenous X\n",
    "U = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N)\n",
    "X = Z @ Theta + noise * U @ Phi + mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N)\n",
    "\n",
    "# Generate test data\n",
    "N_test = 100\n",
    "Z_test = mvn.rvs(mean=np.zeros(q), cov=np.eye(q), size=N_test)\n",
    "U_test = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N_test)\n",
    "X_test = Z_test @ Theta + mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N_test)\n",
    "\n",
    "# Step 3: Generate Outcome Y\n",
    "Y = X @ beta + noise * U @ phi + norm.rvs(0, 1, size=N)\n",
    "Y_test = X_test @ beta + norm.rvs(0, 1, size=N_test)\n",
    "\n",
    "# Prepare the data for IV regression\n",
    "data = pd.DataFrame(np.hstack([Y.reshape(-1, 1), X, Z]), columns=['Y'] + [f'X{i}' for i in range(p)] + [f'Z{i}' for i in range(q)])\n",
    "data_test = pd.DataFrame(np.hstack([Y_test.reshape(-1, 1), X_test, Z_test]), columns=['Y'] + [f'X{i}' for i in range(p)] + [f'Z{i}' for i in range(q)])\n",
    "\n",
    "# Perform IV regression: Y ~ X, instrumented by Z\n",
    "inner_fit = OLS(data[[f'X{i}' for i in range(p)]], data[[f'Z{i}' for i in range(q)]]).fit()\n",
    "X_hat = Z @ inner_fit.params\n",
    "for i in range(p):\n",
    "    data[f'X_hat{i}'] = X_hat.iloc[:, i]\n",
    "\n",
    "outer_fit = OLS(data['Y'], data[[f'X_hat{i}' for i in range(p)]]).fit()\n",
    "beta_2sls = outer_fit.params.values\n",
    "print(f\"OLS_IV beta MSE: {np.sqrt(np.sum((beta_2sls - beta) ** 2))}\")\n",
    "\n",
    "# Additional code to compute gamma and kappa and add to the labels\n",
    "def compute_lambda(alpha, eta, Theta_hat_true, Z, q, p):\n",
    "    matrix_gamma = np.eye(p) - alpha * Theta_hat_true.T @ Z.T @ Z @ Theta_hat_true\n",
    "    eigenvalues_gamma = np.linalg.eigvals(matrix_gamma)\n",
    "    gamma = max(abs(eigenvalues_gamma))\n",
    "    \n",
    "    matrix_kappa = np.eye(q) - eta * Z.T @ Z\n",
    "    eigenvalues_kappa = np.linalg.eigvals(matrix_kappa)\n",
    "    kappa = max(abs(eigenvalues_kappa))\n",
    "    \n",
    "    Lambda = max(gamma, kappa)\n",
    "    return Lambda, gamma, kappa\n",
    "\n",
    "Theta_hat_true = np.array(inner_fit.params)\n",
    "\n",
    "# Gradient method with different learning rates for first figure\n",
    "alpha_1 = 0.0012\n",
    "diffs_dict_1 = {}\n",
    "learning_rates_1 = [0.002, 0.01, 0.02, 0.022]\n",
    "for eta in learning_rates_1:\n",
    "    beta_hat = np.zeros(p)\n",
    "    Theta_hat = np.zeros((q, p))\n",
    "    diffs = []\n",
    "    for _ in range(iterations):\n",
    "        beta_hat -= alpha_1 * Theta_hat.T @ Z.T @ (Z @ Theta_hat @ beta_hat - Y)\n",
    "        Theta_hat -= eta * Z.T @ (Z @ Theta_hat - X)\n",
    "        diffs.append(np.sqrt(np.sum((beta_hat - beta_2sls) ** 2)))\n",
    "    Lambda, gamma, kappa = compute_lambda(alpha_1, eta, Theta_hat_true, Z, q, p)\n",
    "    label = f'$\\eta$={eta}, $\\Lambda$=max({gamma:.2f}, {kappa:.2f})={Lambda:.2f}'\n",
    "    diffs_dict_1[label] = diffs\n",
    "\n",
    "# Gradient method with different learning rates for second figure\n",
    "eta_2 = 0.01\n",
    "diffs_dict_2 = {}\n",
    "learning_rates_2 = [0.0005, 0.001, 0.0015, 0.002]\n",
    "for alpha in learning_rates_2:\n",
    "    beta_hat = np.zeros(p)\n",
    "    Theta_hat = np.zeros((q, p))\n",
    "    diffs = []\n",
    "    for _ in range(iterations):\n",
    "        beta_hat -= alpha * Theta_hat.T @ Z.T @ (Z @ Theta_hat @ beta_hat - Y)\n",
    "        Theta_hat -= eta_2 * Z.T @ (Z @ Theta_hat - X)\n",
    "        diffs.append(np.sqrt(np.sum((beta_hat - beta_2sls) ** 2)))\n",
    "    Lambda, gamma, kappa = compute_lambda(alpha, eta_2, Theta_hat_true, Z, q, p)\n",
    "    label = f'$\\\\alpha$={alpha}, $\\\\Lambda$=max({gamma:.2f}, {kappa:.2f})={Lambda:.2f}'\n",
    "    diffs_dict_2[label] = diffs\n",
    "\n",
    "# Plot the differences over iterations for different learning rates (first figure)\n",
    "fig1, ax1 = plt.subplots(figsize=(14, 10))\n",
    "\n",
    "for label, diffs in diffs_dict_1.items():\n",
    "    ax1.plot(diffs, label=label, linewidth=3)\n",
    "ax1.set_xlabel('Iteration', fontsize=28)\n",
    "ax1.set_ylabel(r'$||\\hat{\\beta}^{(t)} - \\hat{\\beta}_{2sls}||$', fontsize=28)\n",
    "ax1.set_title(f'Convergence of IV Estimate with Gradient Method ($\\\\alpha$={alpha_1})', fontsize=28, pad=20)\n",
    "ax1.legend(loc='upper right', fontsize=24)\n",
    "ax1.grid(True)\n",
    "ax1.tick_params(axis='both', which='major', labelsize=22)\n",
    "plt.ylim(0,2.7)\n",
    "\n",
    "plt.savefig(\"figures/gradconv1.png\", dpi=300, bbox_inches='tight')\n",
    "plt.show()\n",
    "\n",
    "# Plot the differences over iterations for different learning rates (second figure)\n",
    "fig2, ax2 = plt.subplots(figsize=(14, 10))\n",
    "\n",
    "for label, diffs in diffs_dict_2.items():\n",
    "    ax2.plot(diffs, label=label, linewidth=3)\n",
    "ax2.set_xlabel('Iteration', fontsize=28)\n",
    "ax2.set_ylabel(r'$||\\hat{\\beta}^{(t)} - \\hat{\\beta}_{2sls}||$', fontsize=28)\n",
    "ax2.set_title(f'Convergence of IV Estimate with Gradient Method ($\\\\eta$={eta_2})', fontsize=28, pad=20)\n",
    "ax2.legend(loc='upper right', fontsize=24)\n",
    "ax2.grid(True)\n",
    "ax2.tick_params(axis='both', which='major', labelsize=22)\n",
    "plt.ylim(0,2.7)\n",
    "\n",
    "plt.savefig(\"figures/gradconv2.png\", dpi=300, bbox_inches='tight')\n",
    "plt.show()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy.stats import norm\n",
    "from scipy.stats import multivariate_normal as mvn\n",
    "from statsmodels.api import OLS\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Set seed for reproducibility\n",
    "np.random.seed(1)\n",
    "\n",
    "# Sample sizes to compare\n",
    "sample_sizes = [50, 100, 150]\n",
    "p = 5\n",
    "q = 10\n",
    "noise = 1\n",
    "# alpha = 0.00002\n",
    "# eta = 0.0008\n",
    "num_simulations = 100\n",
    "\n",
    "# Different iteration counts for GD estimator\n",
    "iterations_list = range(1,101,5)\n",
    "\n",
    "# Function to generate data\n",
    "def generate_data(N, q, p, noise):\n",
    "    Z = mvn.rvs(mean=np.zeros(q), cov=np.eye(q), size=N)\n",
    "    Theta = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=q)\n",
    "    beta = norm.rvs(0, 1, size=p)\n",
    "    Phi = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=p)\n",
    "    phi = norm.rvs(0, 1, size=p)\n",
    "    U = mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N)\n",
    "    X = Z @ Theta + noise * U @ Phi + mvn.rvs(mean=np.zeros(p), cov=np.eye(p), size=N)\n",
    "    Y = X @ beta + noise * U @ phi + norm.rvs(0, 1, size=N)\n",
    "    return Y, X, Z, beta, Theta\n",
    "\n",
    "def compute_lambda(alpha, eta, Theta_hat_true, Z, q, p):\n",
    "    matrix_gamma = np.eye(p) - alpha * Theta_hat_true.T @ Z.T @ Z @ Theta_hat_true\n",
    "    eigenvalues_gamma = np.linalg.eigvals(matrix_gamma)\n",
    "    gamma = max(abs(eigenvalues_gamma))\n",
    "    \n",
    "    matrix_kappa = np.eye(q) - eta * Z.T @ Z\n",
    "    eigenvalues_kappa = np.linalg.eigvals(matrix_kappa)\n",
    "    kappa = max(abs(eigenvalues_kappa))\n",
    "    \n",
    "    Lambda = max(gamma, kappa)\n",
    "    return Lambda, gamma, kappa\n",
    "\n",
    "# Store the results\n",
    "gd_results = {N: {iterations: [] for iterations in iterations_list} for N in sample_sizes}\n",
    "ols_results = []\n",
    "iv_results = []\n",
    "\n",
    "# We will calculate OLS and IV for N=150\n",
    "ols_results_N150 = []\n",
    "iv_results_N150 = []\n",
    "\n",
    "for N in sample_sizes:\n",
    "    for iterations in iterations_list:\n",
    "        gd_final_diffs = []\n",
    "        ols_final_diffs = []\n",
    "        iv_final_diffs = []\n",
    "        for _ in range(num_simulations):\n",
    "            # Generate data\n",
    "            Y, X, Z, beta, Theta = generate_data(N, q, p, noise)\n",
    "            Theta_hat_true = np.array(OLS(X, Z).fit().params)\n",
    "            alpha = 1 / (np.linalg.svd(Z @ Theta_hat_true, compute_uv=False)[0] ** 2)\n",
    "            eta = 1 / (np.linalg.svd(Z, compute_uv=False)[0] ** 2)\n",
    "            # alpha = alpha * 1.9\n",
    "            # eta = eta * 0.1\n",
    "            # print(compute_lambda(alpha, eta, Theta_hat_true, Z, q, p))\n",
    "            # GD Estimator for different iteration counts\n",
    "            inner_fit = OLS(X, Z).fit()\n",
    "            X_hat = Z @ inner_fit.params\n",
    "            beta_gd_hat = np.zeros(p)\n",
    "            Theta_hat = np.zeros((q, p))\n",
    "\n",
    "            for _ in range(iterations):\n",
    "                beta_gd_hat -= alpha * Theta_hat.T @ Z.T @ (Z @ Theta_hat @ beta_gd_hat - Y)\n",
    "                Theta_hat -= eta * Z.T @ (Z @ Theta_hat - X)\n",
    "\n",
    "            gd_final_diff = np.sqrt(np.sum((beta_gd_hat - beta) ** 2))\n",
    "            gd_final_diffs.append(gd_final_diff)\n",
    "\n",
    "            if N == 150:\n",
    "                # OLS Estimator\n",
    "                ols_fit = OLS(Y, X).fit()\n",
    "                beta_ols_hat = ols_fit.params\n",
    "                ols_final_diff = np.sqrt(np.sum((beta_ols_hat - beta) ** 2))\n",
    "                ols_final_diffs.append(ols_final_diff)\n",
    "\n",
    "                # IV Estimator\n",
    "                inner_fit = OLS(X, Z).fit()\n",
    "                X_hat = Z @ inner_fit.params\n",
    "                iv_fit = OLS(Y, X_hat).fit()\n",
    "                beta_iv_hat = iv_fit.params\n",
    "                iv_final_diff = np.sqrt(np.sum((beta_iv_hat - beta) ** 2))\n",
    "                iv_final_diffs.append(iv_final_diff)\n",
    "\n",
    "        avg_gd_final_diff = np.mean(gd_final_diffs)\n",
    "        gd_results[N][iterations].append(avg_gd_final_diff)\n",
    "\n",
    "    if N == 150:\n",
    "        avg_ols_final_diff = np.mean(ols_final_diffs)\n",
    "        avg_iv_final_diff = np.mean(iv_final_diffs)\n",
    "        ols_results_N150 = avg_ols_final_diff\n",
    "        iv_results_N150 = avg_iv_final_diff\n",
    "\n",
    "# Plot the results\n",
    "fig, ax = plt.subplots(figsize=(14, 10))\n",
    "colors = ['b', 'g', 'r']\n",
    "for idx, N in enumerate(sample_sizes):\n",
    "    ax.plot(iterations_list, [np.mean(gd_results[N][it]) for it in iterations_list], marker='o', linestyle='-', linewidth=3, markersize=10,\n",
    "            label=f'GD Estimator (N={N})', color=colors[idx])\n",
    "\n",
    "# Replicate OLS and IV results across the iterations_list for N=150\n",
    "ax.plot(iterations_list, [ols_results_N150] * len(iterations_list), marker='s', linestyle='--', linewidth=3, markersize=10,\n",
    "        label='OLS Estimator (N=150)', color='c')\n",
    "ax.plot(iterations_list, [iv_results_N150] * len(iterations_list), marker='^', linestyle='-.', linewidth=3, markersize=10,\n",
    "        label='2SLS Estimator (N=150)', color='m')\n",
    "\n",
    "ax.set_xlabel('Iterations', fontsize=28)\n",
    "ax.set_ylabel(r'$||\\hat{\\beta}^{(t)} - \\beta||$', fontsize=28)\n",
    "ax.set_title('Bias of Estimators across Iterations', fontsize=28, pad=20)\n",
    "ax.legend(loc='upper right', fontsize=24)\n",
    "ax.grid(True)\n",
    "ax.tick_params(axis='both', which='major', labelsize=22)\n",
    "\n",
    "plt.savefig(\"figures/bias_vs_iteration.png\", dpi=300, bbox_inches='tight')\n",
    "plt.show()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ivtransformer",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
