# ============================================================
# VANILLA PIPELINE CONFIGURATION
# ------------------------------------------------------------
# This YAML file first generates endowments (personas) via the
# baseline EndowmentGenerator, then passes them to the plugin
# pipeline for survey elicitation and regression.
# ============================================================

# --- FILE MANAGEMENT & METADATA ---

metadata:
  name: "vanilla_ATP_w50"       # Short name for output folder naming
  description: "Generate 300 vanilla endowments for ATP Wave 50 using gemini-2.0-flash agents."
  version: "v1"
  seed: 101

# --- INPUT & REFERENCE FILE PATHS ---
paths:
  survey_csv: "data/w50/info.csv"
  survey_yaml: "config/surveys/american_trend_panel.yaml"
  endowments_csv: null                                      # Placeholder (patched by vanilla runner)
  aggregate_json: "data/w50/W50_aggregate_results.json"

# --- REPORT STYLING ---
report:
  plugin: styles/generation_report.css
  regression: styles/lasso_report.css

# ============================================================
#           [STAGE 1] BASELINE ENDOWMENT GENERATION
# ============================================================

endowment_generator:
  backend: "gemini"
  model_name: "gemini-2.0-flash"
  temperature: 0.9
  max_tokens: 1024
  batch_size: 10
  delay: 0.5
  retry_failed: true
  n_endowments: 300
  topic_description: "Misinformation"   # Optional thematic focus

# ============================================================
#           [STAGE 2] PLUGIN PIPELINE (ELICITATION + REGRESSION)
# ============================================================

# --- RESPONSE AGENT CONFIGURATION ---
agent: 
  type: gemini
  model_name: gemini-2.0-flash 
  formality: >                 # System prompt detailing formality requirements for the response
    You are completing a survey. Your answer should reflect the person described in the profile above, using their preferences, beliefs and experiences.
    Respond with only the final answer string, not the code or label in brackets.
    Do not include any reasoning, explanation, or commentary.
    Do not preface your answer with phrases like 'I would choose'.
    Just return the answer text exactly as it appears in the options.
  kwargs:
    temperature: 0             # Deterministic responses
    max_tokens: 128            # Upper limit for response length

# --- RUNNER SETTINGS ---
runner:
  parallel: true
  max_workers: 20
  verbose: true
  target_n: 300              # cap number of endowments used for elicitation

# ============================================================
#           [STAGE 2] LASSO REGRESSION APPROXIMATION
# ============================================================

lasso:
  alpha_expr: "np.logspace(-5, 1, 20)"                      # Lasso tuning grid
  max_iter: 10000
  validation:                                               # Cross-Validation Strategy
    strategy: "cv"                                            # "holdout" or "cv" (cross-validation)
    cv_folds: 5                                               # Used only if strategy == "cv"
  post_selection_refit: false                                # Refit OLS after selection (default: true)

elasticnet:
  alpha_expr: "np.logspace(-5, 1, 20)"                      # ElasticNet alpha grid
  l1_ratio_expr: "np.linspace(0.1, 1, 10)"                     # ElasticNet l1_ratio grid
  max_iter: 10000
  validation:                                               # Cross-Validation Strategy
    strategy: "cv"                                            # Only "cv" is supported
    cv_folds: 5                                               # Used only if strategy == "cv"
  plot_style: "2D"                                          # Plot style for the diagnostics: "2D" (default) or "3D"
  post_selection_refit: false


# --- SPLIT SETTINGS ---
split_settings:
  use_proxy_only: true                                      # Use only proxy agents
  train_val_split: ["train", "valid"]                       # Training and validation splits
  test_split: "test"                                        # Test split

# --- MODEL WEIGHT USAGE ---
weight_assignment_model: "elasticnet"              # Which model's weights to assign to endowments


# ============================================================
# NOTES:
# - Endowments are generated by EndowmentGenerator in Stage 1.
# - Survey elicitation and regression are delegated to plugin pipeline.
# - Token usage is tracked jointly across generator + conductor.
# ============================================================