# ACEAS Configuration for Qwen 3 on 8x H100-80GB GPUs
# Optimized for Anyscale Ray cluster

model:
  name: "Qwen/Qwen3-4B-Instruct-2507"
  use_lora: true
  lora_r: 32
  lora_alpha: 64
  lora_target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  gradient_checkpointing: false  # H100 has 80GB memory
  load_in_8bit: false
  load_in_4bit: false
  torch_dtype: "bfloat16"

training:
  total_timesteps: 50000
  batch_size: 16
  learning_rate: 2e-5
  eval_interval: 2500
  save_interval: 5000
  log_interval: 100

curriculum:
  strategy: "adaptive"
  acb_exploration: 1.0
  acb_alpha: 0.7

scheduling:
  num_workers: 8
  gpu_per_worker: 1.0
  use_async: true
  use_csc: true
  use_eaas: true
  eta_base: 8.0
  lambda_coupling: 0.5

grpo:
  clip_epsilon: 0.2
  kl_coef: 0.05
  group_size: 8

environment:
  timeout_seconds: 15.0
  max_code_length: 2048

generation:
  max_new_tokens: 512
  temperature: 0.8
  top_p: 0.95

dataset:
  name: "humaneval"
  max_tasks: null
