# ============================================================
# PLUGIN PIPELINE CONFIGURATION FOR SURVEY ELICITATION & LASSO
# ------------------------------------------------------------
# This YAML file runs both stages of the alignment:
#
# Stage 1: Parallel elicitation of responses for pre-generated endowments
# Stage 2: Run Lasso/ElasticNet regression to select relevant endowments
#
# Each module (survey, agent, runner, lasso/elasticnet) has its own settings.
# ============================================================

# --- FILE MANAGEMENT & METADATA ---

# >>> Alignment Metadata <<<
metadata:
  name: "persona_300_clutered"        # Short name used in output folder naming
  description: "Elicit responses for pre-generated and clustered PERSONA endowments on ATP W42 and run regression."
  version: "v1"
  seed: 101

# >>> Input & Reference File Paths <<<
paths:
  survey_csv: "data/w42/info.csv"
  survey_yaml: "config/surveys/american_trend_panel.yaml"
  endowments_csv: "data/clustering/persona_300_clustered.csv"   # Pre-generated endowments
  aggregate_json: "data/w42/W42_aggregate_results.json"

# >>> Report Styling <<<
report:
  plugin: styles/generation_report.css
  regression: styles/lasso_report.css       # Style for regression experiment report

# ============================================================
#           [STAGE 1] PARALLEL ELICITATION
# ============================================================

# --- RESPONSE AGENT CONFIGURATION ---
# Settings for the LLM agent that simulates survey responses.
agent: 
  type: gemini
  model_name: gemini-2.0-flash 
  formality: >                 # System prompt detailing formality requirements for the response
    You are completing a survey. Your answer should reflect the person described in the profile above, using their preferences, beliefs and experiences.
    Respond with only the final answer string, not the code or label in brackets.
    Do not include any reasoning, explanation, or commentary.
    Do not preface your answer with phrases like 'I would choose'.
    Just return the answer text exactly as it appears in the options.
  kwargs:
    temperature: 0             # Deterministic responses
    max_tokens: 128            # Upper limit for response length

# --- RUNNER SETTINGS ---
runner:
  parallel: true              # Whether to use parallel workers
  max_workers: 20             # Number of worker threads
  verbose: true               # Verbose logging
  target_n: 300               # (NEW) cap number of endowments sampled for elicitation
# ============================================================
#           [STAGE 2] LASSO REGRESSION APPROXIMATION
# ============================================================

lasso:
  alpha_expr: "np.logspace(-5, 1, 20)"                      # Lasso tuning grid
  max_iter: 10000
  validation:                                               # Cross-Validation Strategy
    strategy: "cv"                                            # "holdout" or "cv" (cross-validation)
    cv_folds: 5                                               # Used only if strategy == "cv"
  post_selection_refit: false                                # Refit OLS after selection (default: true)

elasticnet:
  alpha_expr: "np.logspace(-5, 1, 20)"                      # ElasticNet alpha grid
  l1_ratio_expr: "np.linspace(0.1, 1, 10)"                     # ElasticNet l1_ratio grid
  max_iter: 10000
  validation:                                               # Cross-Validation Strategy
    strategy: "cv"                                            # Only "cv" is supported
    cv_folds: 5                                               # Used only if strategy == "cv"
  plot_style: "2D"                                          # Plot style for the diagnostics: "2D" (default) or "3D"
  post_selection_refit: false


# --- SPLIT SETTINGS ---
split_settings:
  use_proxy_only: true                                      # Use only proxy agents
  train_val_split: ["train", "valid"]                       # Training and validation splits
  test_split: "test"                                        # Test split

# --- MODEL WEIGHT USAGE ---
weight_assignment_model: "elasticnet"              # Which model's weights to assign to endowments


# ============================================================
# NOTES:
# - Endowments must already exist in endowments.csv.
# - This pipeline does not generate new endowments, only elicits responses.
# - Weights for endowments will be assigned using `weight_assignment_model`.
# ============================================================