library(pwr)
library(rlist)
#Load data
data_phq9 = read.csv('../_data/processed/phq9_data.csv',header = TRUE)
data_phq9_diff_wide = read.csv('../_data/processed/phq9_diff_data_wide.csv',header = TRUE)
data_mood = read.csv('../_data/processed/mood_data.csv',header = TRUE)
data_recall = read.csv('../_data/processed/recall_data.csv',header = TRUE)
data_phq9_diff_mood = read.csv('../_data/processed/phq9-diff_mood.csv',header = TRUE)
data_phq9_diff_feedback = read.csv('../_data/processed/phq9-diff_feedback.csv',header = TRUE)
data_diff_text = read.csv('../_data/processed/phq9_diff_text.csv',header = TRUE)
data_mood_diff_text =read.csv('../_data/processed/mood_diff_text.csv',header=TRUE)
data_sae_phq9_g= read.csv('../_data/processed/sae/sae_phq9_gemma2-9b-it_act.csv',header = TRUE)
data_sae_mood_g= read.csv('../_data/processed/sae/sae_mood_gemma2-9b-it_act.csv',header = TRUE)
data_sae_recall_g= read.csv('../_data/processed/sae/sae_recall_gemma2-9b-it_act.csv',header = TRUE)
data_sae_diff_fuB = read.csv('../_data/processed/sae/sae_diff_fuB.csv',header=TRUE)
data_sae_diff_fuB_avg = read.csv('../_data/processed/sae/sae_diff_fuB_avg.csv',header=TRUE)

# remve outlier
sub_outliers = c('sub95_v2b', 'sub96_v2b', 'sub150_v2b', 'sub133_v2b', 'sub180_v3', 'sub131_v3')
data_phq9 = data_phq9[!(data_phq9$sub %in% sub_outliers),]
data_phq9_diff_wide = data_phq9_diff_wide[!(data_phq9_diff_wide$sub %in% sub_outliers),]
data_mood = data_mood[!(data_mood$sub %in% sub_outliers),]
data_recall = data_recall[!(data_recall$sub %in% sub_outliers),]
data_phq9_diff_mood = data_phq9_diff_mood[!(data_phq9_diff_mood$sub %in% sub_outliers),]
data_phq9_diff_feedback = data_phq9_diff_feedback[!(data_phq9_diff_feedback$sub %in% sub_outliers),]
data_diff_text = data_diff_text[!(data_diff_text$sub %in% sub_outliers),]
data_mood_diff_text = data_mood_diff_text[!(data_mood_diff_text$sub %in% sub_outliers),]
data_sae_phq9_g = data_sae_phq9_g[!(data_sae_phq9_g$sub %in% sub_outliers),]
data_sae_mood_g = data_sae_mood_g[!(data_sae_mood_g$sub %in% sub_outliers),]
data_sae_recall_g = data_sae_recall_g[!(data_sae_recall_g$sub %in% sub_outliers),]
data_sae_diff_fuB = data_sae_diff_fuB[!(data_sae_diff_fuB$sub %in% sub_outliers),]
data_sae_diff_fuB_avg = data_sae_diff_fuB_avg[!(data_sae_diff_fuB_avg$sub %in% sub_outliers),]


sum(data_mood$condition=='MH')
sum(data_mood$condition=='ML')

#%% -------- Mood diff model
lm_model_mood_diff = lm(mood_diff~condition, data=data_mood)
summary(lm_model_mood_diff)

#%% -------- PHQ9 Q2 diff model
# data_phq9_diff_wide=data_phq9_diff_wide[data_phq9_diff_wide$autobio=='True',]
lm_model_phq9_diff = lm(phq9_q2~condition, data=data_phq9_diff_wide[data_phq9_diff_wide$autobio=='True',])
summary(lm_model_phq9_diff)

# Recall model
lm_model_recall = lm(recall_diffSent~condition,data=data_recall[data_recall$autobio==TRUE,])
summary(lm_model_recall)

#SAE
lm_model_sae_q2_g = lm(sae_phq9_q2~condition,data=data_sae_phq9_g[data_sae_phq9_g$autobio=='True',])
summary(lm_model_sae_q2_g)

lm_model_sae_q2_g = lm(phq9_q2~sae_phq9_q2,data=data_sae_phq9_g[data_sae_phq9_g$autobio=='True',])
summary(lm_model_sae_q2_g)

lm_model_sae_q2_g_mood= lm(mood_diff~sae_phq9_q2,data=data_sae_mood_g[data_sae_mood_g$autobio=='True',])
summary(lm_model_sae_q2_g_mood)

lm_model_sae_q2_g_recall = lm(recall_diffSent~sae_phq9_q2,data=data_sae_recall_g[data_sae_recall_g$autobio=='True',])
summary(lm_model_sae_q2_g_recall)

#SAE x cond
lm_model_sae_q2_g = lm(phq9_q2~sae_phq9_q2*condition,data=data_sae_phq9_g[data_sae_phq9_g$autobio=='True',])
summary(lm_model_sae_q2_g)

lm_model_sae_q2_g_mood= lm(mood_diff~sae_phq9_q2*condition,data=data_sae_mood_g[data_sae_mood_g$autobio=='True',])
summary(lm_model_sae_q2_g_mood)

lm_model_sae_q2_g_recall = lm(recall_diffSent~sae_phq9_q2*condition,data=data_sae_recall_g[data_sae_recall_g$autobio=='True',])
summary(lm_model_sae_q2_g_recall)






lm_model_phq9_diff = lm(phq9_q2~condition, data=data_phq9_diff_wide[data_phq9_diff_wide$autobio=='False',])
summary(lm_model_phq9_diff)

lm_model_phq9_diff_total = lm(phq9_q2~condition*autobio*s_total, data=data_phq9_diff_wide)
summary(lm_model_phq9_diff_total)

lm_model_phq9_diff_bin = lm(phq9_q2~condition*autobio*s_bin3, data=data_phq9_diff_wide)
summary(lm_model_phq9_diff_bin)

lm_model_phq9_diff_mood = lm(phq9_q2~condition*mood_diff,data=data_phq9_diff_mood)
summary(lm_model_phq9_diff_mood)
 
lm_model_phq9_diff_feedback = lm(phq9_q2~autobio*condition*(sim_sit+mood_change+shoes),data=data_phq9_diff_feedback)
summary(lm_model_phq9_diff_feedback)


#%% --- Recall model
lm_model_recall = lm(recall_diffSent~condition*autobio,data=data_recall)
summary(lm_model_recall)


#%% ---- Text model
data_diff_text$autobio = factor(data_diff_text$autobio, levels = c('True','False'))
lm_model_RecAct = lm(avgRecAct_sim~condition*autobio, data=data_diff_text)
summary(lm_model_RecAct)

data_diff_text$autobio = factor(data_diff_text$autobio, levels = c('False','True'))
lm_model_RecAct = lm(avgRecAct_sim~condition*autobio, data=data_diff_text)
summary(lm_model_RecAct)


lm_model_BasePospert = lm(avgBaselinePospert_sim~condition*autobio, data=data_diff_text)
summary(lm_model_BasePospert)

lm_model_q2Act = lm(q2Act_sim~condition*autobio, data=data_diff_text)
summary(lm_model_q2Act)

lm_model_q2Pospert = lm(q2Pospert_sim~condition*autobio, data=data_diff_text)
summary(lm_model_q2Pospert)

#%% ---- Text model moderation
lm_model_q2Diff_RecAct = lm(phq9_q2~condition*avgRecAct_sim*autobio, data=data_diff_text)
summary(lm_model_q2Diff_RecAct)



data_mood_diff_text$autobio = factor(data_mood_diff_text$autobio, levels = c('True','False'))

lm_model_moodDiff_RecAct = lm(mood_diff~condition*avgRecAct_sim, data=data_mood_diff_text)
summary(lm_model_moodDiff_RecAct)


#%% -- SAE model 
data_sae_phq9_g= read.csv('../_data/processed/sae_phq9_gemma2-9b-it_act.csv',header = TRUE)
data_sae_mood_g= read.csv('../_data/processed/sae_mood_gemma2-9b-it_act.csv',header = TRUE)
data_sae_recall_g= read.csv('../_data/processed/sae_recall_gemma2-9b-it_act.csv',header = TRUE)
data_sae_diff_fuB = read.csv('../_data/processed/sae_diff_fuB.csv',header=TRUE)
data_sae_diff_fuB_avg = read.csv('../_data/processed/sae_diff_fuB_avg.csv',header=TRUE)
lm_model_sae_q2_m = lm(phq9_q2~autobio*condition,data=data_sae_phq9_g)
summary(lm_model_sae_q2_m)


# data_sae_phq9_g=data_sae_phq9_g[data_sae_phq9_g$autobio=='True',]
lm_model_sae_q2_g = lm(phq9_q2~sae_phq9_q2*condition,data=data_sae_phq9_g)
summary(lm_model_sae_q2_g)

lm_model_sae_q2_g = lm(phq9_q2~sae_phq9_q2,data=data_sae_phq9_g)
summary(lm_model_sae_q2_g)

lm_model_sae_q2_g_mood= lm(mood_diff~sae_phq9_q2,data=data_sae_mood_g)
summary(lm_model_sae_q2_g_mood)


data_sae_recall_g=data_sae_recall_g[data_sae_recall_g$autobio=='True',]
lm_model_sae_q2_g_recall = lm(recall_diffSent~sae_phq9_q2,data=data_sae_recall_g)
summary(lm_model_sae_q2_g_recall)

lm_model_sae_diff_fuB = lm(sae_diff_fuB~condition,data=data_sae_diff_fuB)
summary(lm_model_sae_diff_fuB)

lm_model_sae_diff_fuB_avg = lm(sae_diff_fuB_avg~condition,data=data_sae_diff_fuB_avg)
summary(lm_model_sae_diff_fuB_avg)

