{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(345, 47)\n",
      "(302, 48)\n",
      "Index(['level_0', 'Unnamed: 0', 'sessionid', 'content', 'database', 'repeat',\n",
      "       'nCopies', 'copyIDs', 'phq', 'demographics', 'tPrompt', 'twitter',\n",
      "       'hasTwitter', 'mobile', 'date', 'Age', 'Gender', 'Student',\n",
      "       'PriorTreatement', 'Group', 'Covid', 'item1', 'item2', 'item3', 'item4',\n",
      "       'item5', 'item6', 'item7', 'item8', 'item9', 'phq9', 'lentext',\n",
      "       'lenCon', 'lenTweets', 'sharedemographics', 'sharetPrompt',\n",
      "       'shareaPrompt', 'shareaudio', 'sharecalendar', 'shareclog',\n",
      "       'sharecontact', 'sharetlog', 'sharegps', 'shareusername', 'sharetweets',\n",
      "       'index', 'Covid1', 'Covid2'],\n",
      "      dtype='object')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>level_0</th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>sessionid</th>\n",
       "      <th>content</th>\n",
       "      <th>database</th>\n",
       "      <th>repeat</th>\n",
       "      <th>nCopies</th>\n",
       "      <th>copyIDs</th>\n",
       "      <th>phq</th>\n",
       "      <th>demographics</th>\n",
       "      <th>...</th>\n",
       "      <th>sharecalendar</th>\n",
       "      <th>shareclog</th>\n",
       "      <th>sharecontact</th>\n",
       "      <th>sharetlog</th>\n",
       "      <th>sharegps</th>\n",
       "      <th>shareusername</th>\n",
       "      <th>sharetweets</th>\n",
       "      <th>index</th>\n",
       "      <th>Covid1</th>\n",
       "      <th>Covid2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8181</td>\n",
       "      <td>\"My favorite place is any bubble tea cafe beca...</td>\n",
       "      <td>summer</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100</td>\n",
       "      <td>['{\"Q0\":\"0\",\"Q1\":\"1\",\"Q2\":\"0\",\"Q3\":\"0\",\"Q4\":\"1...</td>\n",
       "      <td>['{\"Q0\":\"18-23\",\"Q1\":\"Woman\",\"Q2\":\"Yes, I am a...</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100</td>\n",
       "      <td>-100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>8170</td>\n",
       "      <td>\"My favorite place is around the people I love.\"</td>\n",
       "      <td>summer</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100</td>\n",
       "      <td>['{\"Q0\":\"1\",\"Q1\":\"1\",\"Q2\":\"0\",\"Q3\":\"1\",\"Q4\":\"1...</td>\n",
       "      <td>['{\"Q0\":\"18-23\",\"Q1\":\"Woman\",\"Q2\":\"Yes, I am a...</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100</td>\n",
       "      <td>-100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>8516</td>\n",
       "      <td>\"New Zealand where I did my IQP. The scenery w...</td>\n",
       "      <td>summer</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100</td>\n",
       "      <td>['{\"Q0\":\"1\",\"Q1\":\"1\",\"Q2\":\"1\",\"Q3\":\"3\",\"Q4\":\"2...</td>\n",
       "      <td>['{\"Q0\":\"18-23\",\"Q1\":\"Man\",\"Q2\":\"Yes, I am a u...</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100</td>\n",
       "      <td>-100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4041</td>\n",
       "      <td>\"My favorite place is outside in the sunshine....</td>\n",
       "      <td>summer</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100</td>\n",
       "      <td>['{\"Q0\":\"1\",\"Q1\":\"1\",\"Q2\":\"2\",\"Q3\":\"1\",\"Q4\":\"2...</td>\n",
       "      <td>['{\"Q0\":\"18-23\",\"Q1\":\"Woman\",\"Q2\":\"Yes, I am a...</td>\n",
       "      <td>...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100</td>\n",
       "      <td>-100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>7256</td>\n",
       "      <td>\"My favorite place is a park where I can sit u...</td>\n",
       "      <td>summer</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100</td>\n",
       "      <td>['{\"Q0\":\"0\",\"Q1\":\"1\",\"Q2\":\"2\",\"Q3\":\"2\",\"Q4\":\"0...</td>\n",
       "      <td>[]</td>\n",
       "      <td>...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-100.0</td>\n",
       "      <td>-100</td>\n",
       "      <td>-100</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 48 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   level_0  Unnamed: 0  sessionid  \\\n",
       "0        0           0       8181   \n",
       "1        1           1       8170   \n",
       "2        2           2       8516   \n",
       "3        3           3       4041   \n",
       "4        4           4       7256   \n",
       "\n",
       "                                             content database  repeat  \\\n",
       "0  \"My favorite place is any bubble tea cafe beca...   summer  -100.0   \n",
       "1   \"My favorite place is around the people I love.\"   summer  -100.0   \n",
       "2  \"New Zealand where I did my IQP. The scenery w...   summer  -100.0   \n",
       "3  \"My favorite place is outside in the sunshine....   summer  -100.0   \n",
       "4  \"My favorite place is a park where I can sit u...   summer  -100.0   \n",
       "\n",
       "   nCopies copyIDs                                                phq  \\\n",
       "0   -100.0    -100  ['{\"Q0\":\"0\",\"Q1\":\"1\",\"Q2\":\"0\",\"Q3\":\"0\",\"Q4\":\"1...   \n",
       "1   -100.0    -100  ['{\"Q0\":\"1\",\"Q1\":\"1\",\"Q2\":\"0\",\"Q3\":\"1\",\"Q4\":\"1...   \n",
       "2   -100.0    -100  ['{\"Q0\":\"1\",\"Q1\":\"1\",\"Q2\":\"1\",\"Q3\":\"3\",\"Q4\":\"2...   \n",
       "3   -100.0    -100  ['{\"Q0\":\"1\",\"Q1\":\"1\",\"Q2\":\"2\",\"Q3\":\"1\",\"Q4\":\"2...   \n",
       "4   -100.0    -100  ['{\"Q0\":\"0\",\"Q1\":\"1\",\"Q2\":\"2\",\"Q3\":\"2\",\"Q4\":\"0...   \n",
       "\n",
       "                                        demographics  ... sharecalendar  \\\n",
       "0  ['{\"Q0\":\"18-23\",\"Q1\":\"Woman\",\"Q2\":\"Yes, I am a...  ...           1.0   \n",
       "1  ['{\"Q0\":\"18-23\",\"Q1\":\"Woman\",\"Q2\":\"Yes, I am a...  ...           1.0   \n",
       "2  ['{\"Q0\":\"18-23\",\"Q1\":\"Man\",\"Q2\":\"Yes, I am a u...  ...           1.0   \n",
       "3  ['{\"Q0\":\"18-23\",\"Q1\":\"Woman\",\"Q2\":\"Yes, I am a...  ...           1.0   \n",
       "4                                                 []  ...           0.0   \n",
       "\n",
       "  shareclog sharecontact  sharetlog  sharegps shareusername sharetweets  \\\n",
       "0       1.0          1.0        1.0       0.0             0         0.0   \n",
       "1       1.0          1.0        1.0       1.0             0         0.0   \n",
       "2       1.0          1.0        1.0       1.0             0         0.0   \n",
       "3       1.0          1.0        1.0       1.0             0         0.0   \n",
       "4       0.0          0.0        0.0       0.0             0         0.0   \n",
       "\n",
       "   index Covid1 Covid2  \n",
       "0 -100.0   -100   -100  \n",
       "1 -100.0   -100   -100  \n",
       "2 -100.0   -100   -100  \n",
       "3 -100.0   -100   -100  \n",
       "4 -100.0   -100   -100  \n",
       "\n",
       "[5 rows x 48 columns]"
      ]
     },
     "execution_count": 173,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Author: ML Tlachac, WPI\n",
    "#For StudentSADD, 2021\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "data = pd.read_csv(\"extractedDataShort.csv\")\n",
    "data = data.fillna(value = -100)\n",
    "print(data.shape)\n",
    "data = data[data.repeat != 1] #remove duplicated sessions\n",
    "data = data.reset_index()\n",
    "print(data.shape)\n",
    "print(data.columns)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "142\n",
      "80\n"
     ]
    }
   ],
   "source": [
    "#binary names\n",
    "d10 = []\n",
    "s1 = []\n",
    "for i in range(0, data.shape[0]):\n",
    "    if int(data.phq9[i]) >= 10:\n",
    "        d10.append(1)\n",
    "    else:\n",
    "        d10.append(0)\n",
    "    if int(data.item9[i]) >= 1:\n",
    "        s1.append(1)\n",
    "    else:\n",
    "        s1.append(0)\n",
    "print(sum(d10))\n",
    "print(sum(s1))\n",
    "data[\"d10\"] = d10\n",
    "data[\"s1\"] = s1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Website &$302$&$269$ ($89.07\\%$)&$10.29 \\pm 6.54$&$128$ ($47.58\\%$)&$0.45 \\pm 0.85$&$75$ ($27.88\\%$)\\\\\n",
      "Mobile &$302$&$33$ ($10.93\\%$)&$9.03 \\pm 6.13$&$14$ ($42.42\\%$)&$0.27 \\pm 0.75$&$5$ ($15.15\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "#total, count (%), avg(PHQ-9)\\pm std, count(PHQ-9>=10)(%), avg(item-9)\\pm std, count(item-9>=1)(%)\n",
    "\n",
    "#web\n",
    "df = data[(data.mobile == 0)]\n",
    "print(\"Website &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "#mobile\n",
    "df = data[(data.mobile == 1)]\n",
    "print(\"Mobile &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = data[data.Student != '-100'] #limit to those who shared demographics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Undergraduate &$295$&$236$ ($80.0\\%$)&$10.46 \\pm 6.56$&$117$ ($49.58\\%$)&$0.48 \\pm 0.88$&$68$ ($28.81\\%$)\\\\\n",
      "Graduate &$295$&$59$ ($20.0\\%$)&$8.95 \\pm 6.42$&$22$ ($37.29\\%$)&$0.29 \\pm 0.69$&$11$ ($18.64\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "df = data[(data.Student == 'Yes, I am a undergrad student') | (data.Student == 'Yes, I am an undergraduate student.')]\n",
    "print(\"Undergraduate &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Student == 'Yes, I am a graduate student') | (data.Student == 'Yes, I am a graduate student.')]\n",
    "print(\"Graduate &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Age: $18-23$ &$295$&$240$ ($81.36\\%$)&$10.38 \\pm 6.59$&$118$ ($49.17\\%$)&$0.47 \\pm 0.87$&$67$ ($27.92\\%$)\\\\\n",
      "Age: $24-39$ &$295$&$52$ ($17.63\\%$)&$8.81 \\pm 6.18$&$18$ ($34.62\\%$)&$0.31 \\pm 0.69$&$11$ ($21.15\\%$)\\\\\n",
      "Age: $40-55$ &$295$&$3$ ($1.02\\%$)&$16.33 \\pm 3.77$&$3$ ($100.0\\%$)&$0.67 \\pm 0.94$&$1$ ($33.33\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "df = data[(data.Age == '18-23')]\n",
    "print(\"Age: $18-23$ &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Age == '24-39') | (data.Age == '24-29')]\n",
    "print(\"Age: $24-39$ &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Age == '40-55')]\n",
    "print(\"Age: $40-55$ &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 179,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Woman &$295$&$174$ ($58.98\\%$)&$10.57 \\pm 6.22$&$86$ ($49.43\\%$)&$0.43 \\pm 0.85$&$43$ ($24.71\\%$)\\\\\n",
      "Man &$295$&$108$ ($36.61\\%$)&$8.98 \\pm 6.96$&$42$ ($38.89\\%$)&$0.43 \\pm 0.84$&$28$ ($25.93\\%$)\\\\\n",
      "Other Gender &$295$&$13$ ($4.41\\%$)&$14.38 \\pm 4.94$&$11$ ($84.62\\%$)&$0.77 \\pm 0.7$&$8$ ($61.54\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "df = data[(data.Gender == 'Woman')]\n",
    "print(\"Woman &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Gender == 'Man')]\n",
    "print(\"Man &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Gender == \"Other\")]\n",
    "print(\"Other Gender &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "No Treatment &$295$&$217$ ($73.56\\%$)&$9.45 \\pm 6.39$&$90$ ($41.47\\%$)&$0.4 \\pm 0.85$&$50$ ($23.04\\%$)\\\\\n",
      "Prior Treatment &$295$&$78$ ($26.44\\%$)&$12.14 \\pm 6.6$&$49$ ($62.82\\%$)&$0.55 \\pm 0.83$&$29$ ($37.18\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "df = data[(data.PriorTreatement == 'No')]\n",
    "print(\"No Treatment &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.PriorTreatement == \"Yes\")]\n",
    "print(\"Prior Treatment &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 184,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "White &$295$&$186$ ($63.05\\%$)&$9.99 \\pm 6.04$&$87$ ($46.77\\%$)&$0.37 \\pm 0.72$&$48$ ($25.81\\%$)\\\\\n",
      "Asian &$295$&$59$ ($20.0\\%$)&$9.47 \\pm 7.41$&$25$ ($42.37\\%$)&$0.58 \\pm 1.06$&$16$ ($27.12\\%$)\\\\\n",
      "Hispanic/Latino &$295$&$9$ ($3.05\\%$)&$9.22 \\pm 5.88$&$5$ ($55.56\\%$)&$0.11 \\pm 0.31$&$1$ ($11.11\\%$)\\\\\n",
      "Black &$295$&$10$ ($3.39\\%$)&$10.0 \\pm 6.39$&$4$ ($40.0\\%$)&$0.2 \\pm 0.4$&$2$ ($20.0\\%$)\\\\\n",
      "Other &$295$&$10$ ($3.39\\%$)&$14.3 \\pm 7.2$&$7$ ($70.0\\%$)&$0.8 \\pm 0.98$&$5$ ($50.0\\%$)\\\\\n",
      "Multiple Groups &$295$&$20$ ($6.78\\%$)&$12.65 \\pm 7.0$&$11$ ($55.0\\%$)&$0.8 \\pm 1.21$&$7$ ($35.0\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "df = data[(data.Group == 'White/Caucasian')]\n",
    "print(\"White &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Group == \"Asian\")]\n",
    "print(\"Asian &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Group == 'Hispanic/Latino')]\n",
    "print(\"Hispanic/Latino &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Group == 'Black/African American')]\n",
    "print(\"Black &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Group == 'Other')]\n",
    "print(\"Other &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Group != 'White/Caucasian') & (data.Group != 'Hispanic/Latino') & (data.Group != 'Black/African American') & (data.Group != 'Asian') & (data.Group != 'Other') & (data.Group != 'Prefer not to answer')]\n",
    "print(\"Multiple Groups &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Remote &$180$&$97$ ($53.89\\%$)&$9.9 \\pm 6.86$&$39$ ($40.21\\%$)&$0.47 \\pm 0.87$&$27$ ($27.84\\%$)\\\\\n",
      "Hybrid &$180$&$73$ ($40.56\\%$)&$11.16 \\pm 6.91$&$38$ ($52.05\\%$)&$0.62 \\pm 0.97$&$26$ ($35.62\\%$)\\\\\n",
      "Not Remote &$180$&$10$ ($5.56\\%$)&$14.1 \\pm 7.53$&$7$ ($70.0\\%$)&$0.8 \\pm 0.98$&$5$ ($50.0\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "data = data[data.Covid1 != -100]\n",
    "df = data[(data.Covid1 == 'Yes-Full Time')]\n",
    "print(\"Remote &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Covid1 == 'Yes-Hybrid')]\n",
    "print(\"Hybrid &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Covid1 == 'No-Not At All')]\n",
    "print(\"Not Remote &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "COVID-19 &$190$&$12$ ($6.32\\%$)&$8.58 \\pm 5.71$&$4$ ($33.33\\%$)&$0.25 \\pm 0.43$&$3$ ($25.0\\%$)\\\\\n",
      "No/Unknown COVID-19 &$190$&$168$ ($88.42\\%$)&$10.79 \\pm 7.06$&$80$ ($47.62\\%$)&$0.57 \\pm 0.95$&$55$ ($32.74\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "data = data[data.Covid2 != -100]\n",
    "df = data[(data.Covid2 == 'Yes')]\n",
    "print(\"COVID-19 &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.Covid2 == 'No/Idk') | (data.Covid2 == '\"No/Don\\'t Know\"')]\n",
    "print(\"No/Unknown COVID-19 &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(345, 47)\n",
      "(302, 48)\n",
      "Index(['level_0', 'Unnamed: 0', 'sessionid', 'content', 'database', 'repeat',\n",
      "       'nCopies', 'copyIDs', 'phq', 'demographics', 'tPrompt', 'twitter',\n",
      "       'hasTwitter', 'mobile', 'date', 'Age', 'Gender', 'Student',\n",
      "       'PriorTreatement', 'Group', 'Covid', 'item1', 'item2', 'item3', 'item4',\n",
      "       'item5', 'item6', 'item7', 'item8', 'item9', 'phq9', 'lentext',\n",
      "       'lenCon', 'lenTweets', 'sharedemographics', 'sharetPrompt',\n",
      "       'shareaPrompt', 'shareaudio', 'sharecalendar', 'shareclog',\n",
      "       'sharecontact', 'sharetlog', 'sharegps', 'shareusername', 'sharetweets',\n",
      "       'index', 'Covid1', 'Covid2'],\n",
      "      dtype='object')\n",
      "142\n",
      "80\n"
     ]
    }
   ],
   "source": [
    "data = pd.read_csv(\"extractedDataShort.csv\")\n",
    "data = data.fillna(value = -100)\n",
    "print(data.shape)\n",
    "data = data[data.repeat != 1]\n",
    "data = data.reset_index()\n",
    "print(data.shape)\n",
    "print(data.columns)\n",
    "#binary names\n",
    "d10 = []\n",
    "s1 = []\n",
    "for i in range(0, data.shape[0]):\n",
    "    if int(data.phq9[i]) >= 10:\n",
    "        d10.append(1)\n",
    "    else:\n",
    "        d10.append(0)\n",
    "    if int(data.item9[i]) >= 1:\n",
    "        s1.append(1)\n",
    "    else:\n",
    "        s1.append(0)\n",
    "print(sum(d10))\n",
    "print(sum(s1))\n",
    "data[\"d10\"] = d10\n",
    "data[\"s1\"] = s1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "All &$302$&$302$ ($100.0\\%$)&$10.15 \\pm 6.51$&$142$ ($47.02\\%$)&$0.43 \\pm 0.84$&$80$ ($26.49\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "df = data\n",
    "print(\"All &$\" + str(data.shape[0]) + \"$&$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Demographics &$295$ ($97.68\\%$)&$10.16 \\pm 6.56$&$139$ ($47.12\\%$)&$0.44 \\pm 0.85$&$79$ ($26.78\\%$)\\\\\n",
      "Text Prompt &$298$ ($98.68\\%$)&$10.22 \\pm 6.5$&$141$ ($47.32\\%$)&$0.44 \\pm 0.84$&$80$ ($26.85\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "df = data[(data.sharedemographics == 1)]\n",
    "print(\"Demographics &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.sharetPrompt == 1)]\n",
    "print(\"Text Prompt &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unscripted Audio &$200$ ($66.23\\%$)&$9.77 \\pm 6.25$&$90$ ($45.0\\%$)&$0.36 \\pm 0.78$&$44$ ($22.0\\%$)\\\\\n",
      "Scripted Audio &$194$ ($64.24\\%$)&$9.87 \\pm 6.32$&$89$ ($45.88\\%$)&$0.37 \\pm 0.79$&$44$ ($22.68\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "df = data[(data.shareaPrompt == 1)]\n",
    "print(\"Unscripted Audio &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") \n",
    "df = data[(data.shareaudio == 1)]\n",
    "print(\"Scripted Audio &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\") "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Has Twitter &$47$ ($27.49\\%$)&$10.28 \\pm 6.25$&$21$ ($44.68\\%$)&$0.43 \\pm 0.79$&$13$ ($27.66\\%$)\\\\\n",
      "Shared Username &$16$ ($34.04\\%$)&$8.31 \\pm 4.27$&$5$ ($31.25\\%$)&$0.38 \\pm 0.78$&$4$ ($25.0\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "data = data[(data.hasTwitter != -100) | (data.shareusername == 1)]\n",
    "df = data[(data.hasTwitter == '1') | (data.hasTwitter == \"Yes\") | (data.hasTwitter == 'HasTwitter') | (data.shareusername == 1)]\n",
    "print(\"Has Twitter &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\")\n",
    "data = data[(data.hasTwitter == '1') | (data.hasTwitter == \"Yes\") | (data.hasTwitter == 'HasTwitter') | (data.shareusername == 1)]\n",
    "df = data[data.shareusername == 1]\n",
    "print(\"Shared Username &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Calendar &$11$ ($33.33\\%$)&$7.18 \\pm 4.32$&$5$ ($45.45\\%$)&$0.0 \\pm 0.0$&$0$ ($0.0\\%$)\\\\\n",
      "Call Logs &$10$ ($30.3\\%$)&$7.7 \\pm 4.2$&$5$ ($50.0\\%$)&$0.0 \\pm 0.0$&$0$ ($0.0\\%$)\\\\\n",
      "Text Logs &$10$ ($30.3\\%$)&$7.7 \\pm 4.2$&$5$ ($50.0\\%$)&$0.0 \\pm 0.0$&$0$ ($0.0\\%$)\\\\\n",
      "Contacts &$11$ ($33.33\\%$)&$7.18 \\pm 4.32$&$5$ ($45.45\\%$)&$0.0 \\pm 0.0$&$0$ ($0.0\\%$)\\\\\n",
      "GPS &$21$ ($63.64\\%$)&$7.43 \\pm 4.11$&$7$ ($33.33\\%$)&$0.1 \\pm 0.29$&$2$ ($9.52\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "data = data[data.mobile == 1]\n",
    "df = data[data.sharecalendar == 1]\n",
    "print(\"Calendar &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\")\n",
    "df = data[data.shareclog == 1]\n",
    "print(\"Call Logs &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\")\n",
    "df = data[data.sharetlog == 1]\n",
    "print(\"Text Logs &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\")\n",
    "df = data[data.sharecontact == 1]\n",
    "print(\"Contacts &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\")\n",
    "df = data[data.sharegps == 1]\n",
    "print(\"GPS &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/data.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Unscripted Voice &$110$ ($55.0\\%$)&$9.51 \\pm 6.26$&$44$ ($40.0\\%$)&$0.3 \\pm 0.68$&$22$ ($20.0\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "data = pd.read_csv(\"unscriptedTranscript.csv\")\n",
    "data[\"d10\"] = data.label\n",
    "data[\"s1\"] = data[\"label_q9\"]\n",
    "data[\"item9\"] = data.q9 \n",
    "df = data\n",
    "print(\"Unscripted Voice &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/200,2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Scripted Voice &$115$ ($59.28\\%$)&$9.43 \\pm 6.27$&$45$ ($39.13\\%$)&$0.29 \\pm 0.67$&$22$ ($19.13\\%$)\\\\\n"
     ]
    }
   ],
   "source": [
    "data = pd.read_csv(\"scriptedTranscript.csv\")\n",
    "data[\"d10\"] = data.label\n",
    "data[\"s1\"] = data[\"label_q9\"]\n",
    "data[\"item9\"] = data.q9 \n",
    "df = data\n",
    "print(\"Scripted Voice &$\" + str(df.shape[0]) + \"$ ($\" + str(round(100*df.shape[0]/194,2)) + \"\\%$)&$\" + str(round(sum(df.phq9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.phq9),2)) + \"$&$\" + str(round(sum(df.d10),2)) + \"$ ($\" + str(round(100*sum(df.d10)/df.shape[0],2)) + \"\\%$)&$\" + str(round(sum(df.item9)/df.shape[0],2)) + \" \\pm \" + str(round(np.std(df.item9),2)) + \"$&$\" + str(round(sum(df.s1),2)) + \"$ ($\" + str(round(100*sum(df.s1)/df.shape[0],2)) + \"\\%$)\\\\\\\\\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
