# ============================================================
# VANILLA PIPELINE CONFIGURATION
# ------------------------------------------------------------
# This YAML file first generates endowments (personas) via the
# baseline EndowmentGenerator, then passes them to the plugin
# pipeline for survey elicitation and regression.
# ============================================================

# --- FILE MANAGEMENT & METADATA ---

metadata:
  name: "ATP_w36_vanilla"       # Short name for output folder naming
  description: "Generate baseline endowments for ATP Wave 36 and run plugin pipeline."
  version: "v1"
  seed: 101

# --- INPUT & REFERENCE FILE PATHS ---
paths:
  survey_csv: "data/w36/info.csv"                           # Survey question CSV
  survey_yaml: "config/surveys/american_trend_panel.yaml"   # Schema for survey
  endowments_csv: null                                      # Placeholder (patched by vanilla runner)
  aggregate_json: "data/w36/W36_aggregate_results.json"     # Optional human distribution reference

# --- REPORT STYLING ---
report:
  plugin: styles/generation_report.css
  regression: styles/lasso_report.css

# ============================================================
#           [STAGE 1] BASELINE ENDOWMENT GENERATION
# ============================================================

endowment_generator:
  backend: "openai"
  model_name: "gpt-4o-mini"
  temperature: 0.9
  max_tokens: 1024
  batch_size: 10
  delay: 0.5
  retry_failed: true
  n_endowments: 300
  topic_description: "climate change policy"   # Optional thematic focus

# ============================================================
#           [STAGE 2] PLUGIN PIPELINE (ELICITATION + REGRESSION)
# ============================================================

# --- RESPONSE AGENT CONFIGURATION ---
agent: 
  type: openai
  model_name: gpt-4o-mini
  formality: >
    You are completing a survey. Your answer should reflect the person described in the profile above, using their preferences, beliefs and experiences.
    Respond with only the final answer string, not the code or label in brackets.
    Do not include any reasoning, explanation, or commentary.
    Do not preface your answer with phrases like 'I would choose'.
    Just return the answer text exactly as it appears in the options.
  kwargs:
    temperature: 0
    max_tokens: 128

# --- RUNNER SETTINGS ---
runner:
  parallel: true
  max_workers: 30
  verbose: true
  target_n: 300              # cap number of endowments used for elicitation

# --- REGRESSION SETTINGS ---
lasso:
  alpha_expr: "np.logspace(-5, 1, 20)"
  max_iter: 10000
  validation:
    strategy: "cv"
    cv_folds: 5
  post_selection_refit: true

elasticnet:
  alpha_expr: "np.logspace(-5, 1, 20)"
  l1_ratio_expr: "np.linspace(0.1, 1, 10)"
  max_iter: 10000
  validation:
    strategy: "cv"
    cv_folds: 5
  plot_style: "2D"
  post_selection_refit: true

# --- SPLIT SETTINGS ---
split_settings:
  use_proxy_only: true
  train_val_split: ["train", "valid"]
  test_split: "test"

# --- MODEL WEIGHT USAGE ---
weight_assignment_model: "lasso"

# ============================================================
# NOTES:
# - Endowments are generated by EndowmentGenerator in Stage 1.
# - Survey elicitation and regression are delegated to plugin pipeline.
# - Token usage is tracked jointly across generator + conductor.
# ============================================================