{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os,sys\n",
    "import pandas as pd\n",
    "cwd = os.path.abspath(os.path.curdir)\n",
    "sys.path.append(cwd)  # workplace\n",
    "import numpy as np\n",
    "dataname = 'lu'\n",
    "seed_sim = 101\n",
    "method = 'qwen'\n",
    "syn_path = f'./synthetic/sim_{dataname}/{seed_sim}/{method}_200i_.csv'\n",
    "syn_data = pd.read_csv(syn_path, sep=';')\n",
    "syn_data = syn_data.iloc[:,:10]\n",
    "\n",
    "n, p = 1, .5  # number of trials, probability of each trial\n",
    "new_column_values = np.random.binomial(n, p, len(syn_data.iloc[:,0]))\n",
    "syn_data['target'] = new_column_values\n",
    "\n",
    "syn_path_out = f'./synthetic/sim_{dataname}/{seed_sim}/{method}_200i.csv'\n",
    "syn_data.to_csv(syn_path_out,index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os,sys\n",
    "import pandas as pd\n",
    "cwd = os.path.abspath(os.path.curdir)\n",
    "sys.path.append(cwd)  # workplace\n",
    "import numpy as np\n",
    "dataname = 'lu'\n",
    "seed_sim = 104\n",
    "method = 'mistral'\n",
    "\n",
    "syn_path = f'./synthetic/sim_{dataname}/{seed_sim}/{method}_100i_.csv' # \n",
    "syn_data = pd.read_csv(syn_path, sep=';')\n",
    "syn_data = syn_data.iloc[:,:10]\n",
    "\n",
    "n, p = 1, .5  # number of trials, probability of each trial\n",
    "new_column_values = np.random.binomial(n, p, len(syn_data.iloc[:,0]))\n",
    "syn_data['target'] = new_column_values\n",
    "\n",
    "syn_path_out = f'./synthetic/sim_{dataname}/{seed_sim}/{method}_100i.csv'\n",
    "syn_data.dropna(inplace=True)\n",
    "syn_data.to_csv(syn_path_out,index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "syn_data.dropna(inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "V0        False\n",
       "V1        False\n",
       "V2        False\n",
       "V3        False\n",
       "V4        False\n",
       "V5        False\n",
       "V6        False\n",
       "V7        False\n",
       "V8        False\n",
       "V9        False\n",
       "target    False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "syn_data.isnull().any()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# END"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
