# SLDBench 3D MT-STS configuration for OpenEvolve

max_iterations: 15
checkpoint_interval: 15
log_level: "INFO"
random_seed: 123
language: "python"
file_suffix: ".py"

diff_based_evolution: false
max_code_length: 24000

llm:
  api_base: "http://127.0.0.1:4000"
  api_key: "litellm"
  primary_model: "claude-sonnet-4-6"
  primary_model_weight: 1.0
  secondary_model: null
  secondary_model_weight: 0.0
  temperature: 0.8
  max_tokens: 8192
  timeout: 120
  retries: 3

prompt:
  system_message: |
    You are improving a generic 3D scaling-law family for two related extrapolation tasks.

    The evolving code must preserve these exact function signatures:

    def scaling_law_func(data_points, params):
        ...

    def fit_scaling_law(data_points, loss_values):
        ...

    In this MT-STS family, data_points always has shape (N, 3) with canonical columns:
    [model_size_like, diversity_like, total_data_like]

    Task-specific raw columns are canonicalized before they reach your code:
    - vocab_scaling_law:
      [non_vocab_parameters, vocab_size, num_characters]
      -> [model_size_like, diversity_like, total_data_like]
    - data_constrained_scaling_law:
      [unique_tokens, params, tokens]
      -> [model_size_like, diversity_like, total_data_like]
      where params becomes model_size_like.

    Constraints:
    - Use no more than 7 parameters.
    - Do not hardcode task IDs or separate formulas by task name.
    - Do not rely on input-dependent global statistics inside scaling_law_func
      such as min/max/median/quantiles over the current batch.
    - Keep predictions finite and numerically stable.
    - Fit deterministically.
    - Focus on extrapolation quality on held-out larger-scale settings.
    - Favor simple, stable, parameter-efficient law forms.
    - Log-domain reasoning, bounded exponents, positive reparameterizations,
      clipping, and numerically stable optimization are encouraged.

    The goal is to improve both:
    - the symbolic law structure in scaling_law_func
    - the fitting routine in fit_scaling_law

    Write all improvements only between # EVOLVE-BLOCK-START and # EVOLVE-BLOCK-END.

  num_top_programs: 3
  num_diverse_programs: 2
  use_template_stochasticity: true
  include_artifacts: true
  max_artifact_bytes: 32768

database:
  population_size: 100
  archive_size: 50
  num_islands: 5
  migration_interval: 25
  migration_rate: 0.1
  elite_selection_ratio: 0.1
  exploration_ratio: 0.2
  exploitation_ratio: 0.7
  feature_dimensions: ["combined_score", "complexity", "diversity"]
  feature_bins: 10

evaluator:
  timeout: 600
  parallel_evaluations: 2
  cascade_evaluation: false
  enable_artifacts: true
  use_llm_feedback: false

early_stopping_patience: 100
convergence_threshold: 0.001
early_stopping_metric: "combined_score"
