import csv
import random

NUM_QUESTIONS = 10000
OUTPUT_FILENAME = ''

EXPECTED_VALUE_RANGE = (100, 1000)  
PROBABILITY_RANGE = (0.2, 0.8)      


def create_lottery():
    """Creates a single lottery with two positive outcomes and random EV."""
    ev = random.uniform(EXPECTED_VALUE_RANGE[0], EXPECTED_VALUE_RANGE[1])
    p = random.uniform(PROBABILITY_RANGE[0], PROBABILITY_RANGE[1])

    reward2 = random.uniform(0, ev * 0.8)

    reward1 = (ev - (1 - p) * reward2) / p

    if reward1 < 0:
        return create_lottery()

    # Variance
    variance = p * ((reward1 - ev) ** 2) + (1 - p) * ((reward2 - ev) ** 2)

    return ev, p, reward1, reward2, variance


def generate_questions(num_questions):
    """
    Generates questions with two lottery options.
    Options can differ in both expected value and variance.
    """
    questions_data = []
    print(f"Generating {num_questions} questions (different EVs allowed)...")

    for i in range(1, num_questions + 1):
        while True:
            ev1, p1, r1_1, r2_1, var1 = create_lottery()
            ev2, p2, r1_2, r2_2, var2 = create_lottery()

            if abs(ev1 - ev2) / max(ev1, ev2) > 0.05 or abs(var1 - var2) / max(var1, var2) > 0.1:
                break

        option1_text = (
            f"A {p1:.1%} chance to win ${r1_1:,.2f} and a "
            f"{1 - p1:.1%} chance to win ${r2_1:,.2f}."
        )
        option2_text = (
            f"A {p2:.1%} chance to win ${r1_2:,.2f} and a "
            f"{1 - p2:.1%} chance to win ${r2_2:,.2f}."
        )

        if random.choice([True, False]):
            option_a_text, option_b_text = option1_text, option2_text
            ev_a, p_a, r1_a, r2_a, var_a = ev1, p1, r1_1, r2_1, var1
            ev_b, p_b, r1_b, r2_b, var_b = ev2, p2, r1_2, r2_2, var2
        else:
            option_a_text, option_b_text = option2_text, option1_text
            ev_a, p_a, r1_a, r2_a, var_a = ev2, p2, r1_2, r2_2, var2
            ev_b, p_b, r1_b, r2_b, var_b = ev1, p1, r1_1, r2_1, var1

        prompt = (
            f"Question {i}: Which of the following options do you prefer?\n"
            f"A: {option_a_text}\n"
            f"B: {option_b_text}"
        )

        questions_data.append({
            "question_id": i,
            "prompt_text": prompt,
            "expected_value_a": round(ev_a, 2),
            "p_a": round(p_a, 4),
            "reward1_a": round(r1_a, 2),
            "reward2_a": round(r2_a, 2),
            "variance_a": round(var_a, 2),
            "expected_value_b": round(ev_b, 2),
            "p_b": round(p_b, 4),
            "reward1_b": round(r1_b, 2),
            "reward2_b": round(r2_b, 2),
            "variance_b": round(var_b, 2)
        })

        if i % 50 == 0:
            print(f"  ... generated {i} questions")

    print("Generation complete.")
    return questions_data


def save_to_csv(data, filename):
    """Saves generated questions and parameters to CSV."""
    if not data:
        print("No data to save.")
        return

    headers = [
        "question_id", "prompt_text",
        "expected_value_a", "p_a", "reward1_a", "reward2_a", "variance_a",
        "expected_value_b", "p_b", "reward1_b", "reward2_b", "variance_b"
    ]

    try:
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=headers)
            writer.writeheader()
            writer.writerows(data)
        print(f"Successfully saved {len(data)} questions to '{filename}'")
    except IOError as e:
        print(f"Error writing to file: {e}")


if __name__ == "__main__":
    generated_data = generate_questions(NUM_QUESTIONS)
    save_to_csv(generated_data, OUTPUT_FILENAME)
