# ACEAS Configuration for ICML 2026 Experiments

# Model Configuration
model:
  name: "Salesforce/codegen-350M-mono"
  use_lora: true
  lora_r: 16
  lora_alpha: 32

# Training Configuration
training:
  total_timesteps: 10000
  batch_size: 32
  learning_rate: 1e-5
  eval_interval: 500
  save_interval: 1000
  log_interval: 50

# Curriculum Configuration
curriculum:
  strategy: "adaptive"  # "adaptive", "fixed", or "uniform"
  acb_exploration: 1.0
  acb_alpha: 0.7

# Async/Scheduling Configuration
scheduling:
  num_workers: 4
  use_async: true
  use_csc: true
  use_eaas: true
  eta_base: 8.0
  lambda_coupling: 0.5
  broadcast_interval: 5  # Broadcast weights every N updates (higher = faster throughput)

# GRPO Configuration
grpo:
  clip_epsilon: 0.2
  kl_coef: 0.1
  group_size: 8

# Environment Configuration
environment:
  timeout_seconds: 10.0
  max_code_length: 2048

# Generation Configuration
generation:
  max_new_tokens: 256
  temperature: 0.8
  top_p: 0.95
