{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9af2a154",
   "metadata": {},
   "outputs": [],
   "source": [
    "from statsmodels.tsa.arima_process import ArmaProcess\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e0a9574a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def mean_model(X, v):\n",
    "    # Here, v should be v_rep of dimension N. \n",
    "    mu = np.exp(0.2*np.sum(np.cos(X[:,:3]), axis=1)+np.sum(0.2/(X[:,3:5]**2+1), axis=1)+0.2+v)\n",
    "    return mu\n",
    "\n",
    "def generate_data(data_type, dir_name, p = 10, n_simul = 100):    \n",
    "    n_sub, n_num, lam, rand_dist = data_type.split('-')\n",
    "    n_sub, n_num, lam = int(n_sub), int(n_num), float(lam)\n",
    "    N = n_sub * n_num    \n",
    "    arma = ArmaProcess([1, -0.5], 1) # AR(1) with coeff 0.5        \n",
    "    for repeat in range(n_simul):\n",
    "        # generate data\n",
    "        np.random.seed(repeat)\n",
    "        X = arma.generate_sample(nsample=(N,p), axis=1)\n",
    "        if rand_dist == 'fixed':\n",
    "            u_rep, v_rep = np.repeat(1, N), np.repeat(0, N)\n",
    "        elif rand_dist == 'gamma':\n",
    "            u = np.random.gamma(1/lam, lam, n_sub)\n",
    "            u_rep = np.repeat(u, n_num)\n",
    "            v_rep = np.log(u_rep)\n",
    "        elif rand_dist == 'normal':\n",
    "            v = np.random.normal(0, np.sqrt(lam), n_sub)\n",
    "            u = np.exp(v)\n",
    "            u_rep = np.repeat(u, n_num)\n",
    "            v_rep = np.repeat(v, n_num)\n",
    "        mu = mean_model(X, v_rep, b0, c0, c1)\n",
    "        y = np.random.poisson(mu)\n",
    "        # save data\n",
    "        data = pd.DataFrame(X, columns=[('x'+str(i)) for i in range(p)])\n",
    "        data['y'] = y\n",
    "        data['u'] = u_rep\n",
    "        data['mu'] = mu\n",
    "        data['sub'] = np.repeat(np.arange(n_sub), n_num)\n",
    "        data['num'] = np.tile(np.arange(n_num), n_sub)\n",
    "        file_name = dir_name + 'simul-data-' + data_type + '-' + str(repeat)\n",
    "        data.to_csv(file_name+'.csv', index=False)\n",
    "        del data              "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2067992",
   "metadata": {},
   "outputs": [],
   "source": [
    "dir_name = os.getcwd()\n",
    "data_type_list = ['1000-10-0-fixed', '1000-10-0.5-gamma', '1000-10-1-gamma', '1000-10-0.5-normal', '1000-10-1-normal']\n",
    "\n",
    "for data_type in data_type_list:    \n",
    "    generate_data(data_type, dir_name, p=10, ar=[1, -0.5], ma=1, n_simul=100)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
