# ACEAS Configuration for Qwen 3 4B Instruct
# Optimized for 4x A10G (24GB) GPUs on Ray cluster

# Model Configuration
model:
  name: "Qwen/Qwen3-4B-Instruct-2507"
  use_lora: false
  lora_r: 32
  lora_alpha: 64
  lora_target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  gradient_checkpointing: true
  load_in_8bit: false
  load_in_4bit: false
  torch_dtype: "bfloat16"

# Training Configuration
training:
  total_timesteps: 50000
  batch_size: 8
  learning_rate: 2e-6
  eval_interval: 2500
  save_interval: 5000
  log_interval: 100

# Curriculum Configuration
curriculum:
  strategy: "adaptive"
  acb_exploration: 1.0
  acb_alpha: 0.7

# Async/Scheduling Configuration
scheduling:
  num_workers: 4
  gpu_per_worker: 1.0
  use_async: true
  use_csc: true
  use_eaas: true
  eta_base: 8.0
  lambda_coupling: 0.5

# GRPO Configuration
grpo:
  clip_epsilon: 0.2
  kl_coef: 0.05
  group_size: 4

# Environment Configuration
environment:
  timeout_seconds: 15.0
  max_code_length: 2048

# Generation Configuration
generation:
  max_new_tokens: 512
  temperature: 0.8
  top_p: 0.95

# Dataset Configuration
dataset:
  name: "humaneval"
  max_tasks: null
