# ============================================================
#      ENDOWMENT GENERATION CONFIGURATION (STANDALONE)
# ------------------------------------------------------------
# This YAML file configures a synthetic persona generation
# experiment. It produces endowments and corresponding survey
# responses using LLM agents across multiple question themes.
#
# Stage: Generate personas & simulate their survey responses
# ============================================================

# --- EXPERIMENT METADATA ---
# Describes the experiment and ensures reproducibility.
metadata:
  name: "ATP_w41_endowments"              # Short name for naming output folder
  description: "Generate 200 active endowments for ATP Wave 41 using GPT-4-o agents."
  version: "v1"                           # Config version identifier
  seed: 101                               # Random seed for reproducibility

# --- INPUT FILES ---
# Files needed for survey format, attribute selection, and mode logic.
paths:
  survey_csv: "data/se1_bin/w41.csv"                                  # CSV with preprocessed survey questions
  survey_yaml: "config/surveys/american_trend_panel.yaml"             # YAML schema to interpret survey structure
  attribute_bank: "config/attribute_banks/attribute_bank_biases.yaml" # Defines attribute modes, themes and attribute keywords

# --- RESPONSE AGENT CONFIGURATION ---
# Settings for the LLM that simulates agent responses.
agent:
  type: openai
  model_name: gpt-4o
  formality: >                 # System prompt guiding the style and rules for answering
    You are completing a survey. Your answer should reflect the person described in the profile above, using their preferences, beliefs and experiences.
    Respond with only the final answer string, not the code or label in brackets.
    Do not include any reasoning, explanation, or commentary.
    Do not preface your answer with phrases like 'I would choose'.
    Just return the answer text exactly as it appears in the options.
  kwargs:
    temperature: 0             # Make responses deterministic
    max_tokens: 128            # Max characters allowed in the LLM response

# --- ENDOWMENT MODEL SETTINGS ---
# LLM settings for generating persona attribute sets.
endowment_model:
  model: gpt-4o-mini           # Model used to create persona endowments
  temperature: 0.9             # Higher temperature to increase diversity
  batch_size: 5                # Number of personas generated per LLM call
  delay: 0.5                   # Optional delay (in seconds) between calls
  retry_failed: true           # Automatically retry failed API requests
  max_attributes: 10           # Max number of attributes per endowment
  randomize_attributes: true   # Randomly sample attributes from selected mode

# --- ATTRIBUTE LEARNER SETTINGS ---
# Used to infer attributes from survey and questions.
attribute_learner:
  model: gpt-4o
  max_tokens: 1024             # Output length for attribute learning

# --- GENERATION STRATEGY ---
# Core configuration for the simulation and adaptive learning process.
generation:
  target_n: 200                 # Total number of persona endowments to generate
  initial_n: 10                 # Initial count per mode (before adaptation begins)
  num_update_steps: 5           # Number of adaptive updates based on variability
  parallel: true                # Use multiprocessing for faster simulation
  max_workers: 30               # Number of parallel threads
  question_patch:
    fraction: 0.75              # % of endowments allocated to patching questions
    top_k: 3                    # Track the lowest-entropy k questions per round
    min_repeats: 2              # Reallocate patch quota if a question appears ≥ this many rounds
  verbose: true                 # Print verbose logs during generation

# --- REPORT STYLING ---
# Optional visual customization for the HTML output report.
report:
  css: styles/generation_report.css        # Path to custom CSS file for generation report

# ============================================================
# GLOSSARY
# ------------------------------------------------------------
# endowment: A synthetic persona profile used to simulate survey responses
# proxy agent: The LLM responding to surveys using the persona
# patching: Reallocating new endowments to questions with low variability,
#           using attributes inferred from those questions
# attribute bank: Structured schema of persona attributes grouped by mode and theme
# ============================================================