{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d61ecc38-996c-4e17-9e37-2b039c24af1f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import jax.numpy as jnp\n",
    "from jax import jit\n",
    "from jax.scipy.stats import norm\n",
    "from jax.scipy.special import logsumexp\n",
    "\n",
    "from functools import partial \n",
    "import numpy as np\n",
    "import scipy as sp\n",
    "\n",
    "from sklearn.datasets import fetch_california_housing\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.utils.random import sample_without_replacement\n",
    "import time\n",
    "from tqdm import tqdm\n",
    "import pandas as pd\n",
    "\n",
    "import pymc as pm\n",
    "\n",
    "import arviz as az\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import random\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fe7e62a7-96b1-4492-9221-bea7c9d94643",
   "metadata": {},
   "outputs": [],
   "source": [
    "housing = fetch_california_housing()\n",
    "print(housing.data.shape, housing.target.shape)\n",
    "print(housing.feature_names[0:6])\n",
    "print(housing.target_names)\n",
    "x,y = fetch_california_housing(return_X_y = True)\n",
    "print(x.shape)\n",
    "print(y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e2a079fc-401e-4042-91b4-2ebb116c58c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "## Regression ##\n",
    "\n",
    "n_origin = 150\n",
    "def load_traintest(train_frac, dataset,seed):\n",
    "    #Load dataset\n",
    "    if dataset ==\"california\":\n",
    "        x,y = fetch_california_housing(return_X_y = True)\n",
    "    else:\n",
    "        print('Invalid dataset')\n",
    "        return\n",
    "\n",
    "    np.random.seed(seed)\n",
    "    random_indices = np.random.choice(x.shape[0], n_origin , replace=False)\n",
    "\n",
    "\n",
    "    x,y = x[random_indices, :],y[random_indices]\n",
    "\n",
    "    n = np.shape(x)[0]\n",
    "    d = np.shape(x)[1]\n",
    "\n",
    "    #Standardize beforehand (for validity)\n",
    "    x = (x - np.mean(x,axis = 0))/np.std(x,axis = 0)\n",
    "    y = (y - np.mean(y))/np.std(y)\n",
    "\n",
    "    #Train test split\n",
    "    ind_train, ind_test = train_test_split(np.arange(n), train_size = int(train_frac*n),random_state = seed)\n",
    "    x_train = x[ind_train]\n",
    "    y_train = y[ind_train]\n",
    "    x_test = x[ind_test]\n",
    "    y_test = y[ind_test]\n",
    "\n",
    "    y_plot = np.linspace(np.min(y_train) - 2, np.max(y_train) + 2,100)\n",
    "    \n",
    "    return x_train,y_train,x_test,y_test,y_plot,n,d\n",
    "\n",
    "\n",
    "\n",
    "#Normal prior PyMC model\n",
    "def fit_mcmc_normal(y,x,B,seed = 100):\n",
    "    with pm.Model() as model:\n",
    "        p = np.shape(x)[1]\n",
    "        a = 5\n",
    "        #Normal\n",
    "        beta = pm.Normal('beta',mu = 0, sigma = a,shape = p)\n",
    "        intercept = pm.Normal('intercept',mu = 0, sigma = a)\n",
    "        sigma = pm.HalfNormal(\"sigma\", sigma = 1)     \n",
    "        obs = pm.Normal('obs',mu = pm.math.dot(x,beta)+ intercept,sigma = sigma,observed=y)\n",
    "\n",
    "        trace = pm.sample(B,random_seed = seed, chains = 4, return_inferencedata=True)\n",
    "    \n",
    "    beta_post = trace.posterior['beta'].values.reshape(-1,p)  # Shape (8000, 10)\n",
    "    intercept_post = trace.posterior['intercept'].values.reshape(-1,1)  # Shape (8000,)\n",
    "    sigma_post = trace.posterior['sigma'].values.reshape(-1,1)  # Shape (8000,)\n",
    "    print(np.mean(beta_post,axis = 0)) #check misspec.\n",
    "    logp = trace[\"sample_stats\"].lp\n",
    "    logml = -logsumexp(-logp.values) - np.log(len(logp))\n",
    "    print(logml)\n",
    "\n",
    "    return beta_post,intercept_post,sigma_post,logml\n",
    "\n",
    "\n",
    "\n",
    "#Repeat 50 mcmc runs for different train test splits\n",
    "def run_mcmc(dataset):\n",
    "    #Repeat over 50 reps\n",
    "    rep = 50\n",
    "    train_frac = 0.6\n",
    "    B = 2000\n",
    "\n",
    "    #Initialize\n",
    "    x,y,x_test,y_test,y_plot,n,d = load_traintest(train_frac,dataset,100)\n",
    "    K = 4\n",
    "\n",
    "    beta_post = np.zeros((rep,4*B, d))\n",
    "    intercept_post = np.zeros((rep,4*B, 1))\n",
    "    sigma_post = np.zeros((rep,4*B,1))\n",
    "    logml_j = np.zeros((rep,K))\n",
    "    times = np.zeros((rep,K))\n",
    "\n",
    "    for k in tqdm(range(1, K+1), desc=\"Processing Models 1 to K\"):\n",
    "        beta_post_k = np.zeros((rep,4*B, 2))\n",
    "        intercept_post_k = np.zeros((rep,4*B, 1))\n",
    "        sigma_post_k = np.zeros((rep,4*B,1))\n",
    "        \n",
    "\n",
    "        for j in tqdm(range(rep)):\n",
    "            seed = 100+j\n",
    "            x,y,x_test,y_test,y_plot,n,d = load_traintest(train_frac,dataset,seed)\n",
    "            x,x_test = x[:,2*(k-1):2*k],x_test[:,2*(k-1):2*k]\n",
    "           \n",
    "            start = time.time()\n",
    "            beta_post_k[j],intercept_post_k[j],sigma_post_k[j],logml_j[j,k-1] = fit_mcmc_normal(y,x,B,seed)\n",
    "            end = time.time()\n",
    "            times[j,k-1] = (end - start)\n",
    "        #Save posterior samples\n",
    "        suffix = \"{}_model_{}_{}\".format(dataset, k,n_origin)\n",
    "        np.save(\"samples/beta_post{}_{}\".format(suffix,n_origin),beta_post_k)\n",
    "        np.save(\"samples/intercept_post{}_{}\".format(suffix,n_origin),intercept_post_k)\n",
    "        np.save(\"samples/sigma_post{}_{}\".format(suffix,n_origin),sigma_post_k)\n",
    "            \n",
    "        print(\"{}: {} ({})\".format(suffix,np.mean(times[:,k-1]), np.std(times[:,k-1])/np.sqrt(rep)))\n",
    "    \n",
    "    np.save(\"samples/logml_j{}_{}\".format(dataset,n_origin),logml_j)\n",
    "    \n",
    "    np.save(\"samples/times_{}_{}\".format(dataset,n_origin),times)\n",
    "\n",
    "\n",
    "    \n",
    "## BAYESIAN CREDIBLE INTERVALS FROM MCMC SAMPLES ##\n",
    "#compute bayesian central 1-alpha credible interval\n",
    "@jit\n",
    "def compute_bayes_band_MCMC_withCBMA(alpha,y_plot,cdf_pred):\n",
    "    cdf_pred = jnp.mean(cdf_pred,axis = 1)\n",
    "    \n",
    "    band_bayes = jnp.zeros(2)\n",
    "    band_bayes=  band_bayes.at[0].set(y_plot[jnp.argmin(jnp.abs(cdf_pred - alpha/2))])\n",
    "    band_bayes = band_bayes.at[1].set(y_plot[jnp.argmin(jnp.abs(cdf_pred - (1-alpha/2)))])\n",
    "    return(band_bayes)\n",
    "\n",
    "\n",
    "\n",
    "## CONFORMAL FROM MCMC SAMPLES ##\n",
    "### JAX IMPLEMENTATION\n",
    "@jit #compute rank (unnormalized by n+1)\n",
    "def compute_rank_IS_withCBMA(logp_samp_n,logwjk):\n",
    "    n= jnp.shape(logp_samp_n)[1] #logp_samp_n is B x n\n",
    "    n_plot = jnp.shape(logwjk)[0]\n",
    "    rank_cp = jnp.zeros(n_plot)\n",
    "    \n",
    "    #compute importance sampling weights and normalizing\n",
    "    wjk = jnp.exp(logwjk)\n",
    "    Zjk = jnp.sum(wjk,axis = 1).reshape(-1,1)\n",
    "    \n",
    "    #compute predictives for y_i,x_i and y_new,x_n+1\n",
    "    p_cp = jnp.dot(wjk/Zjk, jnp.exp(logp_samp_n))\n",
    "    p_new = jnp.sum(wjk**2,axis = 1).reshape(-1,1)/Zjk\n",
    "\n",
    "    #compute predictives for y_i,x_i and y_new,x_n+1\n",
    "    p_CBMA_cp = jnp.dot(wjk, jnp.exp(logp_samp_n))\n",
    "    p_CBMA_new = jnp.sum(wjk**2,axis = 1).reshape(-1,1)\n",
    "    pred_tot_CBMA = jnp.concatenate((p_CBMA_cp,p_CBMA_new),axis = 1)\n",
    "\n",
    "    #compute nonconformity score and sort\n",
    "    pred_tot = jnp.concatenate((p_cp,p_new),axis = 1)\n",
    "    rank_cp = np.sum(pred_tot <= pred_tot[:,-1].reshape(-1,1),axis = 1)\n",
    "    return rank_cp,pred_tot_CBMA\n",
    "\n",
    "\n",
    "#compute region of grid which is in confidence set\n",
    "@jit\n",
    "def compute_cb_region_IS_withCBMA(alpha,logp_samp_n,logwjk): #assumes they are connected\n",
    "    n= jnp.shape(logp_samp_n)[1]#logp_samp_n is B x n\n",
    "    rank_cp,pred_tot_CBMA = compute_rank_IS_withCBMA(logp_samp_n,logwjk)\n",
    "    region_true =rank_cp> alpha*(n+1)\n",
    "    return region_true,pred_tot_CBMA\n",
    "## ##\n",
    "\n",
    "\n",
    "\n",
    "def run_conformal(dataset):\n",
    "    #Compute intervals\n",
    "    #Initialize\n",
    "    train_frac = 0.6\n",
    "    x,y,x_test,y_test,y_plot,n,d = load_traintest(train_frac,dataset,100)\n",
    "    K = 4\n",
    "\n",
    "    suffix = \"{}_model_{}\".format(dataset, 1)\n",
    "\n",
    "    \n",
    "    logml_j_post = jnp.load(\"samples/logml_j{}_{}.npy\".format(dataset,n_origin))\n",
    "        \n",
    "\n",
    "    #Initialize\n",
    "    alpha = 0.2\n",
    "    rep = np.shape(logml_j_post)[0]\n",
    "    n_test = np.shape(x_test)[0]\n",
    "\n",
    "    coverage_cb = np.zeros((rep,n_test,K))\n",
    "    coverage_bayes = np.zeros((rep,n_test,K))\n",
    "   \n",
    "    length_cb = np.zeros((rep,n_test,K))\n",
    "    length_bayes = np.zeros((rep,n_test,K))\n",
    "        \n",
    "    band_bayes = np.zeros((rep,n_test,2,K))\n",
    "    region_cb = np.zeros((rep,n_test,np.shape(y_plot)[0],K))\n",
    "    \n",
    "    times_bayes = np.zeros((rep,K))\n",
    "    times_cb = np.zeros((rep,K))\n",
    "    \n",
    "    CBMA_contr = np.zeros((rep,np.shape(y_plot)[0], np.shape(x)[0]+1, n_test,K))\n",
    "    cdf_CBMA_contr = np.zeros((rep, np.shape(y_plot)[0],n_test,K))\n",
    "\n",
    "\n",
    "\n",
    "    for k in tqdm(range(1, K +1), desc=\"Processing Models 1 to K\"):\n",
    "        suffix = \"{}_model_{}_{}\".format(dataset, k,n_origin)\n",
    "        #Load posterior samples\n",
    "        beta_post_k = jnp.load(\"samples/beta_post{}_{}.npy\".format(suffix,n_origin))\n",
    "        intercept_post_k = jnp.load(\"samples/intercept_post{}_{}.npy\".format(suffix,n_origin))\n",
    "        sigma_post_k = jnp.load(\"samples/sigma_post{}_{}.npy\".format(suffix,n_origin))\n",
    "        \n",
    "        for j in tqdm(range(rep)):\n",
    "            seed = 100 + j\n",
    "            #load dataset\n",
    "            x,y,x_test,y_test,y_plot,n,d = load_traintest(train_frac,dataset,seed)\n",
    "            x,x_test = x[:,2*(k-1):2*k],x_test[:,2*(k-1):2*k]\n",
    "            dy = y_plot[1] - y_plot[0]\n",
    "            \n",
    "            \n",
    "            #Bayes\n",
    "            start = time.time()\n",
    "\n",
    "            @jit #normal cdf from posterior samples\n",
    "            def normal_likelihood_cdf(y,x):\n",
    "                return norm.cdf(y,loc =jnp.dot(beta_post_k[j],x.transpose())+ intercept_post_k[j],scale = sigma_post_k[j]) #compute likelihood samples\n",
    "\n",
    "            #Precompute cdfs\n",
    "            cdf_test =  normal_likelihood_cdf(y_plot.reshape(-1,1,1),x_test)\n",
    "            cdf_CBMA_contr[j,:,:,k-1] = (logml_j_post[j,k-1]) + np.log(np.mean(cdf_test,axis = 1))\n",
    "\n",
    "\n",
    "            for i in (range(n_test)):\n",
    "                band_bayes[j,i,:,k-1] = compute_bayes_band_MCMC_withCBMA(alpha,y_plot,cdf_test[:,:,i])\n",
    "                coverage_bayes[j,i,k-1] = (y_test[i] >=band_bayes[j,i,0,k-1])&(y_test[i] <=band_bayes[j,i,1,k-1])\n",
    "                length_bayes[j,i,k-1] = np.abs(band_bayes[j,i,1,k-1]- band_bayes[j,i,0,k-1])\n",
    "            end = time.time()\n",
    "            times_bayes[j,k-1] = end - start\n",
    "\n",
    "\n",
    "            #Conformal Bayes\n",
    "            start = time.time()\n",
    "            @jit #normal loglik from posterior samples\n",
    "            def normal_loglikelihood(y,x):\n",
    "                return norm.logpdf(y,loc = jnp.dot(beta_post_k[j],x.transpose())+ intercept_post_k[j],scale = sigma_post_k[j]) #compute likelihood samples\n",
    "\n",
    "            logp_samp_n = normal_loglikelihood(y,x)\n",
    "            logwjk = normal_loglikelihood(y_plot.reshape(-1,1,1),x_test)  #numerator of AOI estimator of conformity score\n",
    "            #logwjk_test = normal_loglikelihood(y_test.reshape(-1,1,1),x_test) #.reshape(1,-1,n_test)\n",
    "        \n",
    "\n",
    "            for i in (range(n_test)):\n",
    "                region_cb[j,i,:,k-1],pred_tot_CBMA = compute_cb_region_IS_withCBMA(alpha,logp_samp_n,logwjk[:,:,i])\n",
    "                coverage_cb[j,i,k-1] = region_cb[j,i,np.argmin(np.abs(y_test[i]-y_plot)),k-1] #grid coverage\n",
    "                length_cb[j,i,k-1] = np.sum(region_cb[j,i,:,k-1])*dy\n",
    "                CBMA_contr[j,:,:,i,k-1] = (logml_j_post[j,k-1]) + np.log(pred_tot_CBMA)\n",
    "            end = time.time()\n",
    "            times_cb[j,k-1] = end - start\n",
    "\n",
    "    # #Save regions (need to update)\n",
    "    np.save(\"results/region_cb_{}_{}\".format(dataset,n_origin),region_cb)\n",
    "    np.save(\"results/band_bayes_{}_{}\".format(dataset,n_origin),band_bayes)\n",
    "    \n",
    "\n",
    "    np.save(\"results/coverage_cb_{}_{}\".format(dataset,n_origin),coverage_cb)\n",
    "    np.save(\"results/coverage_bayes_{}_{}\".format(dataset,n_origin),coverage_bayes)\n",
    "  \n",
    "    np.save(\"results/length_cb_{}_{}\".format(dataset,n_origin),length_cb)\n",
    "    np.save(\"results/length_bayes_{}_{}\".format(dataset,n_origin),length_bayes)\n",
    "   \n",
    "\n",
    "    np.save(\"results/times_cb_{}_{}\".format(dataset,n_origin),times_cb)\n",
    "    np.save(\"results/times_bayes_{}_{}\".format(dataset,n_origin),times_bayes)\n",
    "    \n",
    "    \n",
    "    np.save(\"results/CBMA_contr{}_{}\".format(dataset,n_origin),CBMA_contr)\n",
    "    np.save(\"results/cdf_CBMA_contr{}_{}\".format(dataset,n_origin),cdf_CBMA_contr)\n",
    "  \n",
    "    \n",
    "\n",
    "\n",
    "def run_CBMA(dataset):\n",
    "    #Compute intervals\n",
    "    #Initialize\n",
    "    train_frac = 0.6\n",
    "    x,y,x_test,y_test,y_plot,n,d = load_traintest(train_frac,dataset,100)\n",
    "    K = 4\n",
    "\n",
    "    suffix = \"{}_model_{}\".format(dataset, K)\n",
    "\n",
    "    CBMA_contr = (jnp.load(\"results/CBMA_contr{}_{}.npy\".format(dataset,n_origin)))\n",
    "    cdf_CBMA_contr = (jnp.load(\"results/cdf_CBMA_contr{}_{}.npy\".format(dataset,n_origin)))\n",
    "    #rank_full_CBMA = jnp.load(\"results/rank_full_CBMA_full_ridgereg_{}\".format(dataset))\n",
    "    logml_j_post = jnp.load(\"samples/logml_j{}_{}.npy\".format(dataset,n_origin))\n",
    "     \n",
    "\n",
    "        \n",
    "    #Initialize\n",
    "    alpha = 0.2\n",
    "    rep = np.shape(logml_j_post)[0]\n",
    "    n_test = np.shape(x_test)[0]\n",
    "\n",
    "    coverage_cbma = np.zeros((rep,n_test))\n",
    "    coverage_bayesBMA = np.zeros((rep,n_test))\n",
    " \n",
    "    length_cbma = np.zeros((rep,n_test))\n",
    "    length_bayesBMA = np.zeros((rep,n_test))\n",
    "        \n",
    "    band_bayesBMA = np.zeros((rep,n_test,2))\n",
    "    region_cbma = np.zeros((rep,n_test,np.shape(y_plot)[0]))\n",
    "\n",
    "    times_bayesBMA = np.zeros(rep)\n",
    "    times_cbma = np.zeros(rep)\n",
    "\n",
    "    for j in tqdm(range(rep)):\n",
    "        seed = 100 + j\n",
    "\n",
    "        x,y,x_test,y_test,y_plot,n,d = load_traintest(train_frac,dataset,seed)\n",
    "        dy = y_plot[1] - y_plot[0]\n",
    "\n",
    "\n",
    "        #BayesBMA\n",
    "        start = time.time()\n",
    "        cdf_pred = logsumexp(cdf_CBMA_contr[j,:,:,:],axis = -1)\n",
    "\n",
    "        \n",
    "        for i in (range(n_test)):\n",
    "            cdf_pred_i = (cdf_pred[:,i]) - (logsumexp(logml_j_post[j,]))\n",
    "            cdf_pred_i = np.exp(cdf_pred_i)\n",
    "            band_bayesBMA[j,i,0] = y_plot[jnp.argmin(jnp.abs(cdf_pred_i - alpha/2))]\n",
    "            band_bayesBMA[j,i,1] = y_plot[jnp.argmin(jnp.abs(cdf_pred_i - (1-alpha/2)))]\n",
    "            coverage_bayesBMA[j,i] = (y_test[i] >=band_bayesBMA[j,i,0])&(y_test[i] <=band_bayesBMA[j,i,1])\n",
    "            length_bayesBMA[j,i] = np.abs(band_bayesBMA[j,i,1]- band_bayesBMA[j,i,0])\n",
    "        end = time.time()\n",
    "        times_bayesBMA[j] = end - start\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "        ## CONFORMAL FROM MCMC SAMPLES ##\n",
    "\n",
    "        #Conformal BayesCBMA\n",
    "        start = time.time()\n",
    "        \n",
    "        for i in (range(n_test)):\n",
    "            num = logsumexp(CBMA_contr[j, :, :, i, :], axis=-1)\n",
    "            den = logsumexp(num)\n",
    "            CBMA_contr_unified_ji = np.exp(num - den)\n",
    "            #CBMA_contr_unified_ji =  np.exp(logsumexp(CBMA_contr[j, :, :, i, :], axis=-1))/np.sum(np.exp(logsumexp(CBMA_contr[j, :, :, i, :], axis=-1)))\n",
    "            rank_cp_cbma = np.sum(CBMA_contr_unified_ji <= CBMA_contr_unified_ji[:,-1].reshape(-1,1),axis = 1)\n",
    "            region_cbma[j,i,:] = rank_cp_cbma> alpha*(CBMA_contr.shape[2]+1)\n",
    "            coverage_cbma[j,i] = region_cbma[j,i,np.argmin(np.abs(y_test[i]-y_plot))] #grid coverage\n",
    "            length_cbma[j,i] = np.sum(region_cbma[j,i])*dy\n",
    "        end = time.time()\n",
    "        times_cbma[j] = end - start\n",
    "\n",
    "        \n",
    "       \n",
    "    # #Save regions (need to update)\n",
    "    np.save(\"results/region_cbma_{}_{}\".format(dataset,n_origin),region_cbma)\n",
    "    np.save(\"results/band_bayesBMA_{}_{}\".format(dataset,n_origin),band_bayesBMA)\n",
    "  \n",
    "    np.save(\"results/coverage_cbma_{}_{}\".format(dataset,n_origin),coverage_cbma)\n",
    "    np.save(\"results/coverage_bayesBMA_{}_{}\".format(dataset,n_origin),coverage_bayesBMA)\n",
    "    \n",
    "    np.save(\"results/length_cbma_{}_{}\".format(dataset,n_origin),length_cbma)\n",
    "    np.save(\"results/length_bayesBMA_{}_{}\".format(dataset,n_origin),length_bayesBMA)\n",
    "    \n",
    "    np.save(\"results/times_cbma_{}_{}\".format(dataset,n_origin),times_cbma)\n",
    "    np.save(\"results/times_bayesBMA_{}_{}\".format(dataset,n_origin),times_bayesBMA)\n",
    "    \n",
    "   \n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "80da9db2-f606-4275-ac6d-dade3dbcda0d",
   "metadata": {},
   "outputs": [],
   "source": [
    "run_mcmc(\"california\")\n",
    "run_conformal(\"california\")\n",
    "run_CBMA(\"california\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "83287402-a1da-4b18-b5d4-61093690bb55",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Report Conformal times #\n",
    "dataset = \"california\"\n",
    "times_cb = np.load(\"results/times_cb_{}_{}.npy\".format(dataset,n_origin))\n",
    "times_bayes = np.load(\"results/times_bayes_{}_{}.npy\".format(dataset,n_origin))\n",
    "times_cbma = np.load(f\"results/times_cbma_{dataset}_{n_origin}.npy\")\n",
    "times_bayesBMA = np.load(f\"results/times_bayesBMA_{dataset}_{n_origin}.npy\")\n",
    "\n",
    "\n",
    "rep = np.shape(times_cb)[0]\n",
    "K = np.shape(times_cb)[1]\n",
    "\n",
    "#times cb\n",
    "for k in range(1,K+1):\n",
    "    suffix = \"{}_model_{}_{}\".format(dataset, k,n_origin)\n",
    "    print(\"{} Time for cb: {:.3f} ({:.3f})\".format(suffix,np.mean(times_cb[:,k-1]), np.std(times_cb[:,k-1])/np.sqrt(rep)))\n",
    "print()\n",
    "\n",
    "#times bayes\n",
    "for k in range(1,K+1):\n",
    "    suffix = \"{}_model_{}_{}\".format(dataset, k,n_origin)\n",
    "    print(\"{} Time for bayes: {:.3f} ({:.3f})\".format(suffix,np.mean(times_bayes[:,k-1]), np.std(times_bayes[:,k-1])/np.sqrt(rep)))\n",
    "print()\n",
    "\n",
    "\n",
    "\n",
    "#times cbma\n",
    "\n",
    "print(\"{} Time for cbma: {:.3f} ({:.3f})\".format(suffix,np.mean(times_cbma), np.std(times_cbma)/np.sqrt(rep)))\n",
    "\n",
    "#times bayesBMA\n",
    "print(\"{} Time for bayesBMA: {:.3f} ({:.3f})\".format(suffix,np.mean(times_bayesBMA), np.std(times_bayesBMA)/np.sqrt(rep)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd11b4f2-43ec-479b-b887-7b28583acab6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Report Conformal results#\n",
    "# Load the saved results\n",
    "coverage_cb = np.load(f\"results/coverage_cb_{dataset}_{n_origin}.npy\") \n",
    "coverage_bayes = np.load(f\"results/coverage_bayes_{dataset}_{n_origin}.npy\")\n",
    "\n",
    "length_cb = np.load(f\"results/length_cb_{dataset}_{n_origin}.npy\")\n",
    "length_bayes = np.load(f\"results/length_bayes_{dataset}_{n_origin}.npy\")\n",
    "\n",
    "coverage_cbma = np.load(f\"results/coverage_cbma_{dataset}_{n_origin}.npy\")\n",
    "coverage_bayesBMA = np.load(f\"results/coverage_bayesBMA_{dataset}_{n_origin}.npy\")\n",
    "\n",
    "length_cbma = np.load(f\"results/length_cbma_{dataset}_{n_origin}.npy\")\n",
    "length_bayesBMA = np.load(f\"results/length_bayesBMA_{dataset}_{n_origin}.npy\")\n",
    "\n",
    "\n",
    "\n",
    "rep = np.shape(coverage_cb)[0]\n",
    "K = np.shape(coverage_cb)[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "973bbba0-9364-49f6-9809-6a0e6eac5515",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Coverage\n",
    "for k in range(1,K +1):\n",
    "    suffix = \"{}_model_{}_{}\".format(dataset, k,n_origin)\n",
    "    print(\"{} Coverage cb is: {:.3f} ({:.3f})\".format(suffix,np.mean(coverage_cb[:,:,k-1]), np.std(coverage_cb[:,:,k-1])/np.sqrt(rep)))\n",
    "print()\n",
    "\n",
    "for k in range(1,K +1):\n",
    "    suffix = \"{}_model_{}_{}\".format(dataset, k,n_origin)\n",
    "    print(\"{} Coverage bayes is: {:.3f} ({:.3f})\".format(suffix,np.mean(coverage_bayes[:,:,k-1]), np.std(coverage_bayes[:,:,k-1])/np.sqrt(rep)))\n",
    "print()\n",
    "\n",
    "\n",
    "#coverage cbma\n",
    "\n",
    "print(\"{} Coverage for cbma: {:.3f} ({:.3f})\".format(suffix,np.mean(coverage_cbma), np.std(coverage_cbma)/np.sqrt(rep)))\n",
    "\n",
    "#coverage bayesBMA\n",
    "print(\"{} Coverage for bayesBMA: {:.3f} ({:.3f})\".format(suffix,np.mean(coverage_bayesBMA), np.std(coverage_bayesBMA)/np.sqrt(rep)))\n",
    "\n",
    "\n",
    "# Lengths\n",
    "for k in range(1, K+1):\n",
    "    suffix = \"{}_model_{}_{}\".format(dataset, k,n_origin)\n",
    "    print(\"{} length cb is: {:.3f} ({:.3f})\".format(suffix,np.mean(length_cb[:,:,k-1]), np.std(length_cb[:,:,k-1])/np.sqrt(rep)))\n",
    "print()\n",
    "\n",
    "for k in range(1, K+1):\n",
    "    suffix = \"{}_model_{}_{}\".format(dataset, k,n_origin)\n",
    "    print(\"{} length bayes is: {:.3f} ({:.3f})\".format(suffix,np.mean(length_bayes[:,:,k-1]), np.std(length_bayes[:,:,k-1])/np.sqrt(rep)))\n",
    "print()\n",
    "\n",
    "\n",
    "\n",
    "#length cbma\n",
    "\n",
    "print(\"{} length for cbma: {:.3f} ({:.3f})\".format(suffix,np.mean(length_cbma), np.std(length_cbma)/np.sqrt(rep)))\n",
    "\n",
    "#length bayesBMA\n",
    "print(\"{} length for bayesBMA: {:.3f} ({:.3f})\".format(suffix,np.mean(length_bayesBMA), np.std(length_bayesBMA)/np.sqrt(rep)))\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "68a1765c-a97d-4360-975d-dd05b13db2dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the saved results\n",
    "coverage_cb_n50 = np.load(f\"results/coverage_cb_{dataset}_{50}.npy\") \n",
    "coverage_bayes_n50 = np.load(f\"results/coverage_bayes_{dataset}_{50}.npy\")\n",
    "coverage_cb_n100 = np.load(f\"results/coverage_cb_{dataset}_{100}.npy\") \n",
    "coverage_bayes_n100 = np.load(f\"results/coverage_bayes_{dataset}_{100}.npy\")\n",
    "coverage_cb_n150 = np.load(f\"results/coverage_cb_{dataset}_{150}.npy\") \n",
    "coverage_bayes_n150 = np.load(f\"results/coverage_bayes_{dataset}_{150}.npy\")\n",
    "\n",
    "\n",
    "coverage_cbma_n50 = np.load(f\"results/coverage_cbma_{dataset}_{50}.npy\")\n",
    "coverage_bayesBMA_n50 = np.load(f\"results/coverage_bayesBMA_{dataset}_{50}.npy\")\n",
    "coverage_cbma_n100 = np.load(f\"results/coverage_cbma_{dataset}_{100}.npy\")\n",
    "coverage_bayesBMA_n100 = np.load(f\"results/coverage_bayesBMA_{dataset}_{100}.npy\")\n",
    "coverage_cbma_n150 = np.load(f\"results/coverage_cbma_{dataset}_{150}.npy\")\n",
    "coverage_bayesBMA_n150 = np.load(f\"results/coverage_bayesBMA_{dataset}_{150}.npy\")\n",
    "\n",
    "rep = np.shape(coverage_cb_n50)[0]\n",
    "K = np.shape(coverage_cb_n50)[2]\n",
    "\n",
    "# Load the saved results\n",
    "length_cb_n50 = np.load(f\"results/length_cb_{dataset}_{50}.npy\")\n",
    "length_bayes_n50 = np.load(f\"results/length_bayes_{dataset}_{50}.npy\")\n",
    "length_cb_n100 = np.load(f\"results/length_cb_{dataset}_{100}.npy\") \n",
    "length_bayes_n100 = np.load(f\"results/length_bayes_{dataset}_{100}.npy\")\n",
    "length_cb_n150 = np.load(f\"results/length_cb_{dataset}_{150}.npy\") \n",
    "length_bayes_n150 = np.load(f\"results/length_bayes_{dataset}_{150}.npy\")\n",
    "\n",
    "length_cbma_n50 = np.load(f\"results/length_cbma_{dataset}_{50}.npy\")\n",
    "length_bayesBMA_n50 = np.load(f\"results/length_bayesBMA_{dataset}_{50}.npy\")\n",
    "length_cbma_n100 = np.load(f\"results/length_cbma_{dataset}_{100}.npy\")\n",
    "length_bayesBMA_n100 = np.load(f\"results/length_bayesBMA_{dataset}_{100}.npy\")\n",
    "length_cbma_n150 = np.load(f\"results/length_cbma_{dataset}_{150}.npy\")\n",
    "length_bayesBMA_n150 = np.load(f\"results/length_bayesBMA_{dataset}_{150}.npy\")\n",
    "\n",
    "print(length_cb_n50.shape)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "52503267-e840-449c-9018-7498dd203bea",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plotting\n",
    "\n",
    "coverage_cb_n50_model1 = np.mean(coverage_cb_n50[:,:,0], axis=1)\n",
    "coverage_cb_n50_model2 = np.mean(coverage_cb_n50[:,:,1], axis=1)\n",
    "coverage_cb_n50_model3 = np.mean(coverage_cb_n50[:,:,2], axis=1)\n",
    "coverage_cb_n50_model4 = np.mean(coverage_cb_n50[:,:,3], axis=1)\n",
    "\n",
    "coverage_bayes_n50_model1 = np.mean(coverage_bayes_n50[:,:,0], axis=1)\n",
    "coverage_bayes_n50_model2 = np.mean(coverage_bayes_n50[:,:,1], axis=1)\n",
    "coverage_bayes_n50_model3 = np.mean(coverage_bayes_n50[:,:,2], axis=1)\n",
    "coverage_bayes_n50_model4 = np.mean(coverage_bayes_n50[:,:,3], axis=1)\n",
    "\n",
    "se_cb_n50_model1 = np.std(coverage_cb_n50[:,:,0], axis=1) / np.sqrt(coverage_cb_n50.shape[1])\n",
    "se_cb_n50_model2 = np.std(coverage_cb_n50[:,:,1], axis=1) / np.sqrt(coverage_cb_n50.shape[1])\n",
    "se_cb_n50_model3 = np.std(coverage_cb_n50[:,:,2], axis=1) / np.sqrt(coverage_cb_n50.shape[1])\n",
    "se_cb_n50_model4 = np.std(coverage_cb_n50[:,:,3], axis=1) / np.sqrt(coverage_cb_n50.shape[1])\n",
    "\n",
    "se_bayes_n50_model1 = np.std(coverage_bayes_n50[:,:,0], axis=1) / np.sqrt(coverage_bayes_n50.shape[1])\n",
    "se_bayes_n50_model2 = np.std(coverage_bayes_n50[:,:,1], axis=1) / np.sqrt(coverage_bayes_n50.shape[1])\n",
    "se_bayes_n50_model3 = np.std(coverage_bayes_n50[:,:,2], axis=1) / np.sqrt(coverage_bayes_n50.shape[1])\n",
    "se_bayes_n50_model4 = np.std(coverage_bayes_n50[:,:,3], axis=1) / np.sqrt(coverage_bayes_n50.shape[1])\n",
    "\n",
    "sample_sizes = [50, 100, 150]\n",
    "\n",
    "fig, axs = plt.subplots(1, len(sample_sizes), figsize=(18, 5))\n",
    "\n",
    "for i, n in enumerate(sample_sizes):\n",
    "    ax = axs[i]\n",
    "    ax.set_title(f\"Sample Size = {n}\")\n",
    "    \n",
    "    if n == 50:\n",
    "        y_cb = [np.mean(coverage_cb_n50_model1), np.mean(coverage_cb_n50_model2), \n",
    "                np.mean(coverage_cb_n50_model3), np.mean(coverage_cb_n50_model4)]\n",
    "        yerr_cb = [np.std(coverage_cb_n50_model1)/np.sqrt(50), np.std(coverage_cb_n50_model2)/np.sqrt(50), \n",
    "                   np.std(coverage_cb_n50_model3)/np.sqrt(50), np.std(coverage_cb_n50_model4)/np.sqrt(50)]\n",
    "        \n",
    "        y_bayes = [np.mean(coverage_bayes_n50_model1), np.mean(coverage_bayes_n50_model2), \n",
    "                   np.mean(coverage_bayes_n50_model3), np.mean(coverage_bayes_n50_model4)]\n",
    "        yerr_bayes = [np.std(coverage_bayes_n50_model1)/np.sqrt(50), np.std(coverage_bayes_n50_model2)/np.sqrt(50), \n",
    "                      np.std(coverage_bayes_n50_model3)/np.sqrt(50), np.std(coverage_bayes_n50_model4)/np.sqrt(50)]\n",
    "        \n",
    "        # CBMA and BMA methods \n",
    "        \n",
    "        cbma_mean = np.mean(np.mean(coverage_cbma_n50,axis = 1))\n",
    "        bma_mean = np.mean(np.mean(coverage_bayesBMA_n50,axis = 1))\n",
    "        cbma_se = np.std(np.mean(coverage_cbma_n50,axis = 1))/np.sqrt(50)\n",
    "        bma_se = np.std(np.mean(coverage_bayesBMA_n50,axis = 1))/np.sqrt(50) \n",
    "        \n",
    "    elif n == 100:\n",
    "        y_cb = [np.mean(coverage_cb_n100_model1), np.mean(coverage_cb_n100_model2), \n",
    "                np.mean(coverage_cb_n100_model3), np.mean(coverage_cb_n100_model4)]\n",
    "        yerr_cb = [np.std(coverage_cb_n100_model1)/np.sqrt(50), np.std(coverage_cb_n100_model2)/np.sqrt(50), \n",
    "                   np.std(coverage_cb_n100_model3)/np.sqrt(50), np.std(coverage_cb_n100_model4)/np.sqrt(50)]\n",
    "        \n",
    "        y_bayes = [np.mean(coverage_bayes_n100_model1), np.mean(coverage_bayes_n100_model2), \n",
    "                   np.mean(coverage_bayes_n100_model3), np.mean(coverage_bayes_n100_model4)]\n",
    "        yerr_bayes = [np.std(coverage_bayes_n100_model1)/np.sqrt(50), np.std(coverage_bayes_n100_model2)/np.sqrt(50), \n",
    "                      np.std(coverage_bayes_n100_model3)/np.sqrt(50), np.std(coverage_bayes_n100_model4)/np.sqrt(50)]\n",
    "        \n",
    "        # CBMA and BMA methods \n",
    "        cbma_mean = np.mean(np.mean(coverage_cbma_n100,axis = 1))\n",
    "        bma_mean = np.mean(np.mean(coverage_bayesBMA_n100,axis = 1))\n",
    "        cbma_se = np.std(np.mean(coverage_cbma_n100,axis = 1))/np.sqrt(50)\n",
    "        bma_se = np.std(np.mean(coverage_bayesBMA_n100,axis = 1))/np.sqrt(50)  \n",
    "        \n",
    "    elif n == 150:\n",
    "        y_cb = [np.mean(coverage_cb_n150_model1), np.mean(coverage_cb_n150_model2), \n",
    "                np.mean(coverage_cb_n150_model3), np.mean(coverage_cb_n150_model4)]\n",
    "        yerr_cb = [np.std(coverage_cb_n150_model1)/np.sqrt(50), np.std(coverage_cb_n150_model2)/np.sqrt(50), \n",
    "                   np.std(coverage_cb_n150_model3)/np.sqrt(50), np.std(coverage_cb_n150_model4)/np.sqrt(50)]\n",
    "        \n",
    "        y_bayes = [np.mean(coverage_bayes_n150_model1), np.mean(coverage_bayes_n150_model2), \n",
    "                   np.mean(coverage_bayes_n150_model3), np.mean(coverage_bayes_n150_model4)]\n",
    "        yerr_bayes = [np.std(coverage_bayes_n150_model1)/np.sqrt(50), np.std(coverage_bayes_n150_model2)/np.sqrt(50), \n",
    "                      np.std(coverage_bayes_n150_model3)/np.sqrt(50), np.std(coverage_bayes_n150_model4)/np.sqrt(50)]\n",
    "        \n",
    "        # CBMA and BMA methods (Replace these with actual data)\n",
    "        cbma_mean = np.mean(np.mean(coverage_cbma_n150,axis = 1))\n",
    "        bma_mean = np.mean(np.mean(coverage_bayesBMA_n150,axis = 1))\n",
    "        cbma_se = np.std(np.mean(coverage_cbma_n150,axis = 1))/np.sqrt(50)\n",
    "        bma_se = np.std(np.mean(coverage_bayesBMA_n150,axis = 1))/np.sqrt(50) \n",
    "\n",
    "    # Plotting the error bars for the CB method\n",
    "    ax.errorbar([1, 2, 3, 4], y_cb, yerr=yerr_cb, label='CB', fmt='D--', capsize=5, color='blue')\n",
    "    \n",
    "    # Plotting the error bars for the Bayes method\n",
    "    ax.errorbar([1, 2, 3, 4], y_bayes, yerr=yerr_bayes, label='Bayes', fmt='o-.', capsize=5, color='red')\n",
    "\n",
    "    # Plotting CBMA and BMA points\n",
    "    \n",
    "    ax.errorbar(5, bma_mean, yerr=bma_se, fmt='p', color='purple', label='BMA', capsize=5)\n",
    "    ax.errorbar(6, cbma_mean, yerr=cbma_se, fmt='s',  color='lime', label='CBMA', capsize=5)\n",
    "    ax.axhline(y_cb[0], color='gray', linestyle='--', linewidth=1, label=r\"$CBM1$\")\n",
    "\n",
    "    # Formatting\n",
    "    ax.set_xticks([1, 2, 3, 4, 5, 6])\n",
    "    ax.set_xticklabels([r'$\\mathcal{M}_1$', r'$\\mathcal{M}_2$', r'$\\mathcal{M}_3$', r'$\\mathcal{M}_4$',  'BMA','CBMA'])\n",
    "    ax.set_ylabel(\"Coverage\")\n",
    "    ax.set_ylim(0.6, 1)\n",
    "    ax.legend()\n",
    "\n",
    "# Adjust layout\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"CaliforniaCoveragesplot.png\", dpi=300, bbox_inches='tight')\n",
    "\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a3b06d22-b412-4d29-95ec-d4aeb58bc0dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Plotting\n",
    "\n",
    "length_cb_n50_model1 = np.mean(length_cb_n50[:,:,0], axis=1)\n",
    "length_cb_n50_model2 = np.mean(length_cb_n50[:,:,1], axis=1)\n",
    "length_cb_n50_model3 = np.mean(length_cb_n50[:,:,2], axis=1)\n",
    "length_cb_n50_model4 = np.mean(length_cb_n50[:,:,3], axis=1)\n",
    "\n",
    "length_bayes_n50_model1 = np.mean(length_bayes_n50[:,:,0], axis=1)\n",
    "length_bayes_n50_model2 = np.mean(length_bayes_n50[:,:,1], axis=1)\n",
    "length_bayes_n50_model3 = np.mean(length_bayes_n50[:,:,2], axis=1)\n",
    "length_bayes_n50_model4 = np.mean(length_bayes_n50[:,:,3], axis=1)\n",
    "\n",
    "se_cb_n50_model1 = np.std(length_cb_n50[:,:,0], axis=1) / np.sqrt(length_cb_n50.shape[1])\n",
    "se_cb_n50_model2 = np.std(length_cb_n50[:,:,1], axis=1) / np.sqrt(length_cb_n50.shape[1])\n",
    "se_cb_n50_model3 = np.std(length_cb_n50[:,:,2], axis=1) / np.sqrt(length_cb_n50.shape[1])\n",
    "se_cb_n50_model4 = np.std(length_cb_n50[:,:,3], axis=1) / np.sqrt(length_cb_n50.shape[1])\n",
    "\n",
    "se_bayes_n50_model1 = np.std(length_bayes_n50[:,:,0], axis=1) / np.sqrt(length_bayes_n50.shape[1])\n",
    "se_bayes_n50_model2 = np.std(length_bayes_n50[:,:,1], axis=1) / np.sqrt(length_bayes_n50.shape[1])\n",
    "se_bayes_n50_model3 = np.std(length_bayes_n50[:,:,2], axis=1) / np.sqrt(length_bayes_n50.shape[1])\n",
    "se_bayes_n50_model4 = np.std(length_bayes_n50[:,:,3], axis=1) / np.sqrt(length_bayes_n50.shape[1])\n",
    "\n",
    "sample_sizes = [50, 100, 150]\n",
    "\n",
    "# Create subplots\n",
    "fig, axs = plt.subplots(1, len(sample_sizes), figsize=(18, 5))\n",
    "\n",
    "for i, n in enumerate(sample_sizes):\n",
    "    ax = axs[i]\n",
    "    ax.set_title(f\"Sample Size = {n}\")\n",
    "    \n",
    "    if n == 50:\n",
    "        y_cb = [np.mean(length_cb_n50_model1), np.mean(length_cb_n50_model2), \n",
    "                np.mean(length_cb_n50_model3), np.mean(length_cb_n50_model4)]\n",
    "        yerr_cb = [np.std(length_cb_n50_model1)/np.sqrt(50), np.std(length_cb_n50_model2)/np.sqrt(50), \n",
    "                   np.std(length_cb_n50_model3)/np.sqrt(50), np.std(length_cb_n50_model4)/np.sqrt(50)]\n",
    "        \n",
    "        y_bayes = [np.mean(length_bayes_n50_model1), np.mean(length_bayes_n50_model2), \n",
    "                   np.mean(length_bayes_n50_model3), np.mean(length_bayes_n50_model4)]\n",
    "        yerr_bayes = [np.std(length_bayes_n50_model1)/np.sqrt(50), np.std(length_bayes_n50_model2)/np.sqrt(50), \n",
    "                      np.std(length_bayes_n50_model3)/np.sqrt(50), np.std(length_bayes_n50_model4)/np.sqrt(50)]\n",
    "        \n",
    "        # CBMA and BMA methods \n",
    "        \n",
    "        cbma_mean = np.mean(np.mean(length_cbma_n50, axis=1))\n",
    "        bma_mean = np.mean(np.mean(length_bayesBMA_n50, axis=1))\n",
    "        cbma_se = np.std(np.mean(length_cbma_n50, axis=1))/np.sqrt(50)\n",
    "        bma_se = np.std(np.mean(length_bayesBMA_n50, axis=1))/np.sqrt(50)\n",
    "        \n",
    "    elif n == 100:\n",
    "        y_cb = [np.mean(length_cb_n100_model1), np.mean(length_cb_n100_model2), \n",
    "                np.mean(length_cb_n100_model3), np.mean(length_cb_n100_model4)]\n",
    "        yerr_cb = [np.std(length_cb_n100_model1)/np.sqrt(50), np.std(length_cb_n100_model2)/np.sqrt(50), \n",
    "                   np.std(length_cb_n100_model3)/np.sqrt(50), np.std(length_cb_n100_model4)/np.sqrt(50)]\n",
    "        \n",
    "        y_bayes = [np.mean(length_bayes_n100_model1), np.mean(length_bayes_n100_model2), \n",
    "                   np.mean(length_bayes_n100_model3), np.mean(length_bayes_n100_model4)]\n",
    "        yerr_bayes = [np.std(length_bayes_n100_model1)/np.sqrt(50), np.std(length_bayes_n100_model2)/np.sqrt(50), \n",
    "                      np.std(length_bayes_n100_model3)/np.sqrt(50), np.std(length_bayes_n100_model4)/np.sqrt(50)]\n",
    "        \n",
    "        # CBMA and BMA methods \n",
    "        cbma_mean = np.mean(np.mean(length_cbma_n100, axis=1))\n",
    "        bma_mean = np.mean(np.mean(length_bayesBMA_n100, axis=1))\n",
    "        cbma_se = np.std(np.mean(length_cbma_n100, axis=1))/np.sqrt(50)\n",
    "        bma_se = np.std(np.mean(length_bayesBMA_n100, axis=1))/np.sqrt(50)\n",
    "        \n",
    "    elif n == 150:\n",
    "        y_cb = [np.mean(length_cb_n150_model1), np.mean(length_cb_n150_model2), \n",
    "                np.mean(length_cb_n150_model3), np.mean(length_cb_n150_model4)]\n",
    "        yerr_cb = [np.std(length_cb_n150_model1)/np.sqrt(50), np.std(length_cb_n150_model2)/np.sqrt(50), \n",
    "                   np.std(length_cb_n150_model3)/np.sqrt(50), np.std(length_cb_n150_model4)/np.sqrt(50)]\n",
    "        \n",
    "        y_bayes = [np.mean(length_bayes_n150_model1), np.mean(length_bayes_n150_model2), \n",
    "                   np.mean(length_bayes_n150_model3), np.mean(length_bayes_n150_model4)]\n",
    "        yerr_bayes = [np.std(length_bayes_n150_model1)/np.sqrt(50), np.std(length_bayes_n150_model2)/np.sqrt(50), \n",
    "                      np.std(length_bayes_n150_model3)/np.sqrt(50), np.std(length_bayes_n150_model4)/np.sqrt(50)]\n",
    "        \n",
    "        # CBMA and BMA methods (Replace these with actual data)\n",
    "        cbma_mean = np.mean(np.mean(length_cbma_n150, axis=1))\n",
    "        bma_mean = np.mean(np.mean(length_bayesBMA_n150, axis=1))\n",
    "        cbma_se = np.std(np.mean(length_cbma_n150, axis=1))/np.sqrt(50)\n",
    "        bma_se = np.std(np.mean(length_bayesBMA_n150, axis=1))/np.sqrt(50)\n",
    "\n",
    "    # Plotting the error bars for the CB method\n",
    "    ax.errorbar([1, 2, 3, 4], y_cb, yerr=yerr_cb, label='CB', fmt='D--', capsize=5,  color='blue', linewidth=2)\n",
    "    \n",
    "    # Plotting the error bars for the Bayes method\n",
    "    ax.errorbar([1, 2, 3, 4], y_bayes, yerr=yerr_bayes, label='Bayes', fmt='o-.',  capsize=5, color='red', linewidth=2)\n",
    "\n",
    "    # Plotting CBMA and BMA points\n",
    "    ax.errorbar(6, cbma_mean, yerr=cbma_se, fmt='s', color='lime', label='CBMA', capsize=5)\n",
    "    ax.errorbar(5, bma_mean, yerr=bma_se, fmt='p', color='purple', label='BMA', capsize=5)\n",
    "    \n",
    "    ax.axhline(y_cb[0], color='gray', linestyle='--', linewidth=1.1, label=r\"$CBM1$\")\n",
    "\n",
    "    # Formatting\n",
    "    ax.set_xticks([1, 2, 3, 4, 5, 6])\n",
    "    ax.set_xticklabels([r'$\\mathcal{M}_1$', r'$\\mathcal{M}_2$', r'$\\mathcal{M}_3$', r'$\\mathcal{M}_4$', 'BMA', 'CBMA'])\n",
    "    ax.set_ylabel(\"Mean length of intervals\")\n",
    "    #ax.set_ylim(0.5, 1)\n",
    "    ax.legend()\n",
    "\n",
    "# Adjust layout\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"CaliforniaLengthsplot.png\", dpi=300, bbox_inches='tight')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e76c519-5fcb-46f8-9b63-6262e879469c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f379a0b8-4242-4518-b253-14c1b1fd6fe3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Majority vote method of set aggregation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "796ebd38-5889-46f6-adb6-d55f2346b324",
   "metadata": {},
   "outputs": [],
   "source": [
    "region_cb_n50 = np.load(f\"results/region_cb_{dataset}_{50}.npy\")\n",
    "band_bayes_n50 = np.load(f\"results/band_bayes_{dataset}_{50}.npy\")\n",
    "region_cbma_n50 = np.load(f\"results/region_cbma_{dataset}_{50}.npy\")\n",
    "\n",
    "n_test_50 = region_cbma_n50.shape[1]\n",
    "\n",
    "length_majorityvote_n50 = np.zeros((rep,n_test,n_test_50))\n",
    "length_CBMA_n50 = np.zeros((rep,n_test,n_test_50))\n",
    "\n",
    "for j in tqdm(range(rep)):\n",
    "    for i in range(n_test_50):\n",
    "        length_majorityvote_n50[j,i] = np.sum(((region_cb_n50[j,i,:,0] + region_cb_n50[j,i,:,1] + region_cb_n50[j,i,:,2] + region_cb_n50[j,i,:,3] ) > 2))\n",
    "        length_CBMA_n50[j,i] = np.sum(region_cbma_n50[j,i,:])\n",
    "\n",
    "print(f\"Mean lengths under majority vote n50 = {np.mean(np.mean(length_majorityvote_n50, axis = 1))}, and SE {np.std(np.mean(length_majorityvote_n50, axis = 1))/np.sqrt(50)}\")\n",
    "print(f\"Mean lengths underCBMA n50 = {np.mean(np.mean(length_CBMA_n50, axis = 1))}, and SE {np.std(np.mean(length_CBMA_n50, axis = 1))/np.sqrt(50)}\")\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "region_cb_n50 = np.load(f\"results/region_cb_{dataset}_{50}.npy\")\n",
    "band_bayes_n50 = np.load(f\"results/band_bayes_{dataset}_{50}.npy\")\n",
    "region_cbma_n50 = np.load(f\"results/region_cbma_{dataset}_{50}.npy\")\n",
    "\n",
    "n_test_50 = region_cbma_n50.shape[1]\n",
    "\n",
    "ratio_n50 = np.zeros((rep,n_test_50))\n",
    "times_major_50 = np.zeros((rep))\n",
    "for j in tqdm(range(rep)):\n",
    "    start = time.time()\n",
    "    for i in range(n_test_50):\n",
    "       ratio_n50[j,i] = np.sum(((region_cb_n50[j,i,:,0] + region_cb_n50[j,i,:,1] + region_cb_n50[j,i,:,2] + region_cb_n50[j,i,:,3] ) > 2))/np.sum(region_cbma_n50[j,i,:])\n",
    "        #length_CBMA_n50[j,i] = np.sum(region_cbma_n50[j,i,:])\n",
    "    end = time.time()\n",
    "    times_major_50[j] = end - start\n",
    "print(f\"Mean ratio lengths under majority vote/CBMA n50 = {np.mean(np.mean(ratio_n50, axis = 1))}, and SE {np.std(np.mean(ratio_n50, axis = 1))/np.sqrt(50)}\")\n",
    "print(f\"Mean times major n 50 = {np.mean(times_major_50)}, and SE {np.std(times_major_50)/np.sqrt(50)}\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3672cbcb-b392-4f8a-8040-1426b90bd4d3",
   "metadata": {},
   "outputs": [],
   "source": [
    "region_cb_n100 = np.load(f\"results/region_cb_{dataset}_{100}.npy\")\n",
    "band_bayes_n100 = np.load(f\"results/band_bayes_{dataset}_{100}.npy\")\n",
    "region_cbma_n100 = np.load(f\"results/region_cbma_{dataset}_{100}.npy\")\n",
    "\n",
    "n_test_100 = region_cbma_n100.shape[1]\n",
    "\n",
    "length_majorityvote_n100 = np.zeros((rep,n_test,n_test_100))\n",
    "length_CBMA_n100 = np.zeros((rep,n_test,n_test_100))\n",
    "\n",
    "for j in tqdm(range(rep)):\n",
    "    for i in range(n_test_100):\n",
    "        length_majorityvote_n100[j,i] = np.sum((np.sum(region_cb_n100[j,i,:,:] ,axis = -1) > 2))\n",
    "        length_CBMA_n100[j,i] = np.sum(region_cbma_n100[j,i,:])\n",
    "\n",
    "print(f\"Mean lengths under majority vote n100 = {np.mean(np.mean(length_majorityvote_n100, axis = 1))}, and SE {np.std(np.mean(length_majorityvote_n100, axis = 1))/np.sqrt(50)}\")\n",
    "print(f\"Mean lengths under CBMA n100 = {np.mean(np.mean(length_CBMA_n100, axis = 1))}, and SE {np.std(np.mean(length_CBMA_n100, axis = 1))/np.sqrt(50)}\")\n",
    "\n",
    "\n",
    "region_cb_n100 = np.load(f\"results/region_cb_{dataset}_{100}.npy\")\n",
    "band_bayes_n100 = np.load(f\"results/band_bayes_{dataset}_{100}.npy\")\n",
    "region_cbma_n100 = np.load(f\"results/region_cbma_{dataset}_{100}.npy\")\n",
    "\n",
    "n_test_100 = region_cbma_n100.shape[1]\n",
    "ratio_n100 = np.zeros((rep,n_test_100))\n",
    "\n",
    "for j in tqdm(range(rep)):\n",
    "    for i in range(n_test_100):\n",
    "       ratio_n100[j,i] = np.sum((np.sum(region_cb_n100[j,i,:,:] ,axis = -1) > 2))/np.sum(region_cbma_n100[j,i,:])\n",
    "        #length_CBMA_n50[j,i] = np.sum(region_cbma_n50[j,i,:])\n",
    "\n",
    "print(f\"Mean ratio lengths under majority vote/CBMA n100 = {np.mean(np.mean(ratio_n100, axis = 1))}, and SE {np.std(np.mean(ratio_n100, axis = 1))/np.sqrt(50)}\")\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1a4509a8-06b5-4b5f-b326-232918344650",
   "metadata": {},
   "outputs": [],
   "source": [
    "region_cb_n150 = np.load(f\"results/region_cb_{dataset}_{150}.npy\")\n",
    "band_bayes_n150 = np.load(f\"results/band_bayes_{dataset}_{150}.npy\")\n",
    "region_cbma_n150 = np.load(f\"results/region_cbma_{dataset}_{150}.npy\")\n",
    "\n",
    "n_test_150 = region_cbma_n150.shape[1]\n",
    "\n",
    "length_majorityvote_n150 = np.zeros((rep,n_test,n_test_150))\n",
    "length_CBMA_n150 = np.zeros((rep,n_test,n_test_150))\n",
    "\n",
    "for j in tqdm(range(rep)):\n",
    "    for i in range(n_test_150):\n",
    "        length_majorityvote_n150[j,i] = np.sum((np.sum(region_cb_n150[j,i,:,:] ,axis = -1) > 2))\n",
    "        length_CBMA_n150[j,i] = np.sum(region_cbma_n150[j,i,:])\n",
    "\n",
    "print(f\"Mean lengths under majority vote n150 = {np.mean(np.mean(length_majorityvote_n150, axis = 1))}, and SE {np.std(np.mean(length_majorityvote_n150, axis = 1))/np.sqrt(50)}\")\n",
    "print(f\"Mean lengths under CBMA n150 = {np.mean(np.mean(length_CBMA_n150, axis = 1))}, and SE {np.std(np.mean(length_CBMA_n150, axis = 1))/np.sqrt(50)}\")\n",
    "\n",
    "\n",
    "region_cb_n150 = np.load(f\"results/region_cb_{dataset}_{150}.npy\")\n",
    "band_bayes_n150 = np.load(f\"results/band_bayes_{dataset}_{150}.npy\")\n",
    "region_cbma_n150 = np.load(f\"results/region_cbma_{dataset}_{150}.npy\")\n",
    "\n",
    "n_test_150 = region_cbma_n150.shape[1]\n",
    "ratio_n150 = np.zeros((rep,n_test_150))\n",
    "\n",
    "for j in tqdm(range(rep)):\n",
    "    for i in range(n_test_150):\n",
    "       ratio_n150[j,i] = np.sum((np.sum(region_cb_n150[j,i,:,:] ,axis = -1) > 2))/np.sum(region_cbma_n150[j,i,:])\n",
    "\n",
    "print(f\"Mean ratio lengths under majority vote/CBMA n150 = {np.mean(np.mean(ratio_n150, axis = 1))}, and SE {np.std(np.mean(ratio_n150, axis = 1))/np.sqrt(50)}\")\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "60ab4a51-55e3-4edf-8b4e-4da0126b56b5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
