import numpy as np
import pandas as pd
import scipy.stats

np.random.seed(0)

num_items = 19
num_points_per_item = 5
num_participants = 120
min_age = 18
max_age = 99
num_levels = 3
condition_names = ["system_A", "system_B", "system_C"]
conditions_binom_p_1 = [0.7, 0.6, 0.2]
conditions_binom_p_2 = [0.85, 0.9, 0.45]

participant_ids = [int(e) for e in range(1, num_participants + 1)]

is_native_speaker = [item for sublist in [[True, False] for i in range(int(num_participants/2))] for item in sublist]

age_range = [int(e) for e in range(min_age, max_age + 1)]
age_samples = np.random.choice(age_range,
                 size=num_participants,
                 replace=True)


min_score = num_items * 1
max_score = num_items * num_points_per_item
data = []
for i in range(len(participant_ids)):
    for j, condition in enumerate(condition_names):
        # Randomly choose from one binomial distribution s.t. we get a bi-modal distribution
        if np.random.random(1) > 0.5:
            trust_score = scipy.stats.binom.rvs(max_score, conditions_binom_p_1[j], size=1)[0]
        else:
            trust_score = scipy.stats.binom.rvs(max_score, conditions_binom_p_2[j], size=1)[0]
        # Participant id effect
        trust_score += participant_ids[i]%10
        # Make sure it is inside the range
        trust_score = np.min([max_score, np.max([min_score, trust_score])])
        data.append({
            "Participant_Id": participant_ids[i],
            "Age": age_samples[i],
            "Is_Native_Speaker": is_native_speaker[i],
            "Condition": condition,
            "Trust_Score": trust_score
        })

df = pd.DataFrame(data)
print(df)
df.to_csv("toy_data.csv", index=False)

