{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "39b8268c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import random\n",
    "import scipy.stats\n",
    "from scipy.stats import norm\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from tqdm.auto import tqdm\n",
    "import time\n",
    "import scipy.linalg as scilinalg\n",
    "import seaborn as sns\n",
    "from scipy.stats import ortho_group\n",
    "import pandas as pd\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "import multiprocessing as mp\n",
    "from joblib import Parallel, delayed\n",
    "from utils import *\n",
    "\n",
    "from conf_simu import *\n",
    "\n",
    "np.random.seed(1234)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "0185f5c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "def simu_synthetic(d1,d2,alpha,het,sd,tail,pr,k_star,rk,M_mean,mis_set,full_exp=False):\n",
    "    # generate ubderlying matrix M_star, observed indices S\n",
    "    M_star, A, P, S = gen_data(d1,d2,het,sd,tail,pr,M_mean,mis_set,k_star)\n",
    "    # cmc with als base\n",
    "    coverage_cmc_als, coverage_als, length_cmc_als, length_als = cfmc_simu(M_star, S, P, \"als\", rk, alpha)\n",
    "    if full_exp:\n",
    "        # cmc with cvx base\n",
    "        coverage_cmc_cvx, coverage_cvx, length_cmc_cvx, length_cvx = cfmc_simu(M_star, S, P, \"cvx\", rk, alpha)\n",
    "        return coverage_cmc_cvx, coverage_cmc_als, coverage_cvx, coverage_als, length_cmc_cvx, length_cmc_als, length_cvx, length_als\n",
    "    else:\n",
    "        return coverage_cmc_als, coverage_als, length_cmc_als, length_als"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e1c6d4c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "coverage rate: \n",
      "\t cmc-als 0.9017381835779964\n",
      "\t cmc-cvx 0.8979270850137119\n",
      "\t als 0.8267059203097273\n",
      "\t cvx 0.9241611550250041\n",
      "Average length: \n",
      "\t cmc-als 3.9092\n",
      "\t cmc-cvx 3.7193\n",
      "\t als 3.0995\n",
      "\t cvx 3.8718\n"
     ]
    }
   ],
   "source": [
    "d1 = d2 = 500\n",
    "alpha = 0.1\n",
    "sd = 1\n",
    "het = 'homo'\n",
    "pr = 0.8\n",
    "rk = 30\n",
    "k_star = 8\n",
    "M_mean = 1\n",
    "mis_set = 2\n",
    "full_exp=True\n",
    "tail = 'gaussian'\n",
    "if full_exp:\n",
    "    coverage_cmc_cvx, coverage_cmc_als, coverage_cvx, coverage_als, length_cmc_cvx, length_cmc_als, length_cvx, length_als = simu_synthetic(d1,d2,alpha,het,sd,tail,pr,k_star,rk,M_mean,mis_set,full_exp=full_exp)\n",
    "    print('coverage rate: \\n\\t cmc-als {}\\n\\t cmc-cvx {}\\n\\t als {}\\n\\t cvx {}'.format(coverage_cmc_als, coverage_cmc_cvx, coverage_als, coverage_cvx))\n",
    "    print('Average length: \\n\\t cmc-als {}\\n\\t cmc-cvx {}\\n\\t als {}\\n\\t cvx {}'.format(length_cmc_als, length_cmc_cvx, length_als, length_cvx))\n",
    "else:\n",
    "    coverage_cmc_als, coverage_als, length_cmc_als, length_als = simu_synthetic(d1,d2,alpha,het,sd,tail,pr,k_star,rk,M_mean,mis_set,full_exp=full_exp)\n",
    "    print('coverage rate: \\n\\t cmc-als {}\\n\\t als {}'.format(coverage_cmc_als, coverage_als))\n",
    "    print('Average length: \\n\\t cmc-als {}\\n\\t als {}'.format(length_cmc_als, length_als))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "cdf22178",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 'gaussian', 1]\n"
     ]
    }
   ],
   "source": [
    "# repeated\n",
    "# remember to disable plotting\n",
    "alpha = 0.1\n",
    "sd0 = 1\n",
    "het = 'homo' # 'homo' | 'logis' | 'rank1'\n",
    "base1 = 'cvx'\n",
    "base2 = base = 'als'\n",
    "repN = 100\n",
    "M_mean = 1\n",
    "k_star = 8\n",
    "sigma_true=False\n",
    "num_cores = mp.cpu_count()\n",
    "\n",
    "\n",
    "for d in [500]:\n",
    "    d1 = d2 = d\n",
    "    for full_exp in [False,True]:\n",
    "        if full_exp:\n",
    "            rk_seq = range(4,25,4)\n",
    "        else:\n",
    "            rk_seq = range(2,41,2)\n",
    "        for pr in [0.8]:\n",
    "            for tail in ['gaussian']:\n",
    "                for mis_set in [0,1,2,3]:\n",
    "                    for rk in rk_seq:\n",
    "                        if mis_set==2:\n",
    "                            pr = 0.2\n",
    "                        else:\n",
    "                            pr = 0.8\n",
    "                        if mis_set==3:\n",
    "                            tail='t'\n",
    "                            sd = 0.2\n",
    "                        else:\n",
    "                            tail='gaussian'\n",
    "                            sd = sd0\n",
    "                        print([mis_set, tail, sd])\n",
    "                        if __name__ == \"__main__\":\n",
    "                            results = Parallel(n_jobs=num_cores)(delayed(simu_synthetic)(d1,d2,alpha,het,sd,tail,pr,k_star,rk,M_mean,mis_set,full_exp=full_exp) for i in range(repN))\n",
    "                        results = np.array(results)\n",
    "\n",
    "                        if full_exp:\n",
    "                            res_mat = results.reshape(repN,8)\n",
    "                            cov_rt_cf_cvx = res_mat[:,0]\n",
    "                            cov_rt_cf_als = res_mat[:,1]\n",
    "                            cov_rt_cvx = res_mat[:,2]\n",
    "                            cov_rt_als = res_mat[:,3]\n",
    "                            cov_ = np.hstack((cov_rt_cf_cvx, cov_rt_cf_als))\n",
    "                            cov_ = np.hstack((cov_,cov_rt_cvx))\n",
    "                            cov_ = np.hstack((cov_,cov_rt_als))\n",
    "                            len_ave_cf_cvx = res_mat[:,4]\n",
    "                            len_ave_cf_als = res_mat[:,5]\n",
    "                            len_ave_cvx = res_mat[:,6]\n",
    "                            len_ave_als = res_mat[:,7]\n",
    "                            len_ = np.hstack((len_ave_cf_cvx, len_ave_cf_als))\n",
    "                            len_ = np.hstack((len_,len_ave_cvx))\n",
    "                            len_ = np.hstack((len_,len_ave_als))\n",
    "\n",
    "                            label1 = 'cf-'+base1\n",
    "                            label2 = 'cf-'+base2\n",
    "                            label3 = 'cvx'\n",
    "                            label4 = 'als'\n",
    "                            nam_ = [label1]*repN + [label2]*repN + [label3]*repN + [label4]*repN \n",
    "                            cov_df = pd.DataFrame(cov_, columns=['coverage'])\n",
    "                            len_df = pd.DataFrame(len_, columns=['length'])\n",
    "                            cov_df['approach'] = nam_\n",
    "                            len_df['approach'] = nam_\n",
    "\n",
    "                        else:\n",
    "                            res_mat = results.reshape(repN,4)\n",
    "                            cov_rt_cf_als = res_mat[:,0]\n",
    "                            cov_rt_als = res_mat[:,1]\n",
    "                            cov_ = np.hstack((cov_rt_cf_als,cov_rt_als))\n",
    "                            len_ave_cf_als = res_mat[:,2]\n",
    "                            len_ave_als = res_mat[:,3]\n",
    "                            len_ = np.hstack((len_ave_cf_als,len_ave_als))\n",
    "\n",
    "                            label1 = 'cf-'+base2\n",
    "                            label2 = 'als'\n",
    "                            nam_ = [label1]*repN + [label2]*repN\n",
    "                            cov_df = pd.DataFrame(cov_, columns=['coverage'])\n",
    "                            len_df = pd.DataFrame(len_, columns=['length'])\n",
    "                            cov_df['approach'] = nam_\n",
    "                            len_df['approach'] = nam_\n",
    "\n",
    "                        file_dir1 = '../results/cov_'+str(d1)+'_'+str(mis_set)+'_'+str(k_star)+'_'+str(rk)+'_'+het+'_'+str(sd)+'_'+tail+'_'+str(pr)+'_'+str(full_exp)+'.csv'\n",
    "                        file_dir2 = '../results/len_'+str(d1)+'_'+str(mis_set)+'_'+str(k_star)+'_'+str(rk)+'_'+het+'_'+str(sd)+'_'+tail+'_'+str(pr)+'_'+str(full_exp)+'.csv'\n",
    "                        cov_df.to_csv(file_dir1, index=False)\n",
    "                        len_df.to_csv(file_dir2, index=False)\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
