{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "11f329a8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import os\n",
    "import pandas as pd\n",
    "import scipy\n",
    "from scipy.stats import beta\n",
    "from scipy.stats import binomtest\n",
    "\n",
    "path_to_data =  #Input path to data\n",
    "data = pd.read_csv(path_to_data)\n",
    "\n",
    "\n",
    "def iv_test(prob_mat):\n",
    "    n   = prob_mat.shape[1]\n",
    "    iv_sums = np.zeros((2,n)) #0-same, 1-diff\n",
    "    for j in range(n):\n",
    "        iv_sums[0,j] = prob_mat[0,j,0] + prob_mat[1,j,1]\n",
    "        iv_sums[1,j] = prob_mat[1,j,0] + prob_mat[0,j,1]\n",
    "    return iv_sums\n",
    "\n",
    "def output_count(samples):\n",
    "    '''\n",
    "    Takes samples as input and outputs the fraction of the samples that\n",
    "    satisfy the IV inequality. Calls iv_test by passing it the conditional\n",
    "    distribution based on the samples.\n",
    "    '''\n",
    "    count = 0\n",
    "    for i in samples:\n",
    "        jnt = np.reshape(i,(2,6,2))\n",
    "        cnd = jnt/np.sum(np.sum(jnt,axis=2,keepdims=True),axis=1,keepdims=True) #gXdXa, summing over d and a to get conditional on s\n",
    "        iv_sum_res = iv_test(cnd)\n",
    "        if np.sum(iv_sum_res>=1)==0:#satisfy IV\n",
    "            count = count +1\n",
    "        else:\n",
    "            continue\n",
    "    c = count/samples.shape[0]\n",
    "    return c"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fab115c0-f898-4d7c-a3f2-8d8d525d0d48",
   "metadata": {},
   "source": [
    "## The procedure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46db5cbd-e931-434e-adba-2dd74ca63f28",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Sample from prior distribution \n",
    "sup_gen = (data[\"Gender\"].unique()).shape[0]\n",
    "sup_dom = (data[\"Dept\"].unique()).shape[0] #n\n",
    "sup_out = (data[\"Admit\"].unique()).shape[0]\n",
    "\n",
    "alpha = np.ones(sup_gen*sup_dom*sup_out) #Parameter for the Uniform Dirichlet prior\n",
    "prior_sample_size = int(1e6)\n",
    "posterior_sample_size = int(1e6)\n",
    "\n",
    "prior_samples = np.random.dirichlet(alpha, prior_sample_size) #Sampling from prior\n",
    "c = output_count(prior_samples) #fraction of prior samples that satisfy the IV inequality\n",
    "#sample counts\n",
    "sample_counts = np.zeros((sup_gen,sup_dom,sup_out))\n",
    "gender_counter = 0\n",
    "#The way the data is entered, the resulting samples are of the shape (2,n,2) where the first axis corresponds to \n",
    "#gender, the second to the department and the third to the outcome. \n",
    "for gender in data[\"Gender\"].unique():\n",
    "    dept_counter=0\n",
    "    for dept in data[\"Dept\"].unique():\n",
    "        sample_counts[gender_counter,dept_counter,0] = data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Admitted')].Freq.iloc[0]\n",
    "        sample_counts[gender_counter,dept_counter,1] = data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Rejected')].Freq.iloc[0]\n",
    "        dept_counter=dept_counter+1\n",
    "    gender_counter=gender_counter+1\n",
    "#Updated sample counts that are used to update the posterior \n",
    "posterior_samples = np.random.dirichlet(alpha+np.reshape(sample_counts,(alpha.shape)),posterior_sample_size) #Sampling from posterior\n",
    "f = output_count(posterior_samples) #fraction of posterior samples that satisfy the IV inequality\n",
    "num_success = int(f*posterior_sample_size)\n",
    "print('The confidence interval of the posterior probability is [' + str(binomtest(num_success, posterior_sample_size).proportion_ci()[0]) + ',' +str(binomtest(num_success,posterior_sample_size).proportion_ci()[1])+']')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cd181834-6998-4b5c-b461-60bea9950b36",
   "metadata": {},
   "source": [
    "## Trying different priors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6cc0e81e-c661-437d-940d-ec91b8a58a08",
   "metadata": {},
   "outputs": [],
   "source": [
    "sup_gen = (data[\"Gender\"].unique()).shape[0]\n",
    "sup_dom = (data[\"Dept\"].unique()).shape[0] #n\n",
    "sup_out = (data[\"Admit\"].unique()).shape[0]\n",
    "alp=0.05 \n",
    "\n",
    "prior_sample_size = int(1e6)\n",
    "posterior_sample_size = int(1e6)\n",
    "f_vec = []\n",
    "#Sampling from prior\n",
    "\n",
    "scale_vec  = np.logspace(np.log10(0.01),5,20)\n",
    "for scale in scale_vec:\n",
    "    alpha = scale*np.ones(sup_gen*sup_dom*sup_out) #Parameter for the Uniform Dirichlet prior\n",
    "    prior_samples = np.random.dirichlet(alpha, prior_sample_size) \n",
    "    c = output_count(prior_samples) #fraction of prior samples that satisfy the IV inequality\n",
    "    print('prior done')\n",
    "    #sample counts\n",
    "    sample_counts = np.zeros((sup_gen,sup_dom,sup_out))\n",
    "    gender_counter = 0\n",
    "    #The way the data is entered, the resulting samples are of the shape (2,n,2) where the first axis corresponds to \n",
    "    #gender, the second to the department and the third to the outcome. \n",
    "    for gender in data[\"Gender\"].unique():\n",
    "        dept_counter=0\n",
    "        for dept in data[\"Dept\"].unique():\n",
    "            sample_counts[gender_counter,dept_counter,0] = data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Admitted')].Freq.iloc[0]\n",
    "            sample_counts[gender_counter,dept_counter,1] = data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Rejected')].Freq.iloc[0]\n",
    "            dept_counter=dept_counter+1\n",
    "        gender_counter=gender_counter+1\n",
    "    #Updated sample counts that are used to update the posterior \n",
    "    posterior_samples = np.random.dirichlet(alpha+np.reshape(sample_counts,(alpha.shape)),posterior_sample_size) #Sampling from posterior\n",
    "    f = output_count(posterior_samples) #fraction of posterior samples that satisfy the IV inequality\n",
    "    num_successf*posterior_sample_size)\n",
    "    f_vec.append(binomtest(num_success, posterior_sample_size).proportion_ci()[0])\n",
    "\n",
    "plt.semilogx(scale_vec,np.array(f_vec),marker='o')\n",
    "plt.xlabel('alpha')\n",
    "plt.ylabel('Lower limit of confidence interval')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "36c53003-541c-4387-ba80-8c8f8ffc1d99",
   "metadata": {},
   "source": [
    "## Frequentist test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e35ffb6-df51-4cf4-8ad4-0531569ba9a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Implementing the IV inequality test that Wang, Robins, Richardson 2017 propose \n",
    "from scipy.stats import chisquare\n",
    "\n",
    "sup_gen = (data[\"Gender\"].unique()).shape[0]\n",
    "sup_dom = (data[\"Dept\"].unique()).shape[0] #n\n",
    "sup_out = (data[\"Admit\"].unique()).shape[0]\n",
    "\n",
    "#Construct variable (Q) from data\n",
    "sample_counts = np.zeros((sup_gen,sup_dom,sup_out))\n",
    "table_counts  = np.zeros((sup_dom,sup_out,4))\n",
    "gender_counter = 0\n",
    "for gender in data[\"Gender\"].unique():\n",
    "    dept_counter=0\n",
    "    for dept in data[\"Dept\"].unique():\n",
    "        sample_counts[gender_counter,dept_counter,0] = data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Admitted')].Freq.iloc[0]\n",
    "        sample_counts[gender_counter,dept_counter,1] = data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Rejected')].Freq.iloc[0]\n",
    "        if gender_counter == 0:\n",
    "            table_counts[dept_counter,0,0] = data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Admitted')].Freq.iloc[0] #Q^d0=0 for Z=0\n",
    "            table_counts[dept_counter,0,1] = sum(data.loc[(data.Gender==gender)].Freq) - data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Admitted')].Freq.iloc[0] #Q^d0=1 for Z=0\n",
    "            table_counts[dept_counter,1,0] = data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Rejected')].Freq.iloc[0] #Q^d1=0 for Z=0\n",
    "            table_counts[dept_counter,1,1] = sum(data.loc[(data.Gender==gender)].Freq) - data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Rejected')].Freq.iloc[0]#Q^d1=1 for Z=0\n",
    "        if gender_counter == 1:\n",
    "            table_counts[dept_counter,0,3] = data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Admitted')].Freq.iloc[0] #Q^d0=1 for Z=1\n",
    "            table_counts[dept_counter,0,2] = sum(data.loc[(data.Gender==gender)].Freq) - data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Admitted')].Freq.iloc[0] #Q^d0=0 for Z=1\n",
    "            table_counts[dept_counter,1,3] = data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Rejected')].Freq.iloc[0] #Q^d1=1 for Z=1\n",
    "            table_counts[dept_counter,1,2] = sum(data.loc[(data.Gender==gender)].Freq) - data.loc[(data.Gender==gender)&(data.Dept==dept)&(data.Admit=='Rejected')].Freq.iloc[0]#Q^d1=0 for Z=1\n",
    "        dept_counter=dept_counter+1\n",
    "    gender_counter=gender_counter+1\n",
    "\n",
    "for d in range(sup_dom):\n",
    "    for a in range(sup_out):\n",
    "        c_00 = table_counts[d,a,3]\n",
    "        c_01 = table_counts[d,a,2]\n",
    "        c_10 = table_counts[d,a,1]\n",
    "        c_11 = table_counts[d,a,0]\n",
    "        N = c_00 + c_01 + c_10 + c_11\n",
    "        e_00 = N*((c_00+c_01)/N)*((c_00 + c_10)/N)\n",
    "        e_01 = N*((c_00+c_01)/N)*((c_01 + c_11)/N)\n",
    "        e_10 = N*((c_00+c_10)/N)*((c_11 + c_10)/N)\n",
    "        e_11 = N*((c_11+c_10)/N)*((c_11 + c_01)/N)\n",
    "        obs = [c_00,c_01,c_10,c_11]\n",
    "        exp = [e_00,e_01,e_10,e_11]\n",
    "        print(c_00/(c_00+c_01) - c_10/(c_10+c_11))\n",
    "        print(chisquare(obs,exp)[1])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
