# ACEAS Configuration for Qwen 3 1.7B
# Optimized for 4x A10G (24GB) GPUs on Ray cluster

# Model Configuration
model:
  name: "Qwen/Qwen3-1.7B"
  use_lora: true
  lora_r: 16
  lora_alpha: 32
  lora_target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  gradient_checkpointing: true  # Enable to reduce memory usage on A10G
  load_in_8bit: false
  load_in_4bit: false
  torch_dtype: "bfloat16"  # A10G supports bf16 natively; requires correct CUBLAS version

# Training Configuration
training:
  total_timesteps: 5000
  batch_size: 4
  learning_rate: 2e-5
  eval_interval: 500
  save_interval: 5000
  log_interval: 50

# Curriculum Configuration
curriculum:
  strategy: "adaptive"
  acb_exploration: 1.0
  acb_alpha: 0.7

# Async/Scheduling Configuration
scheduling:
  num_workers: 4
  gpu_per_worker: 1.0
  use_async: true
  use_csc: true
  use_eaas: true
  eta_base: 8.0
  lambda_coupling: 0.5

# GRPO Configuration
grpo:
  clip_epsilon: 0.2
  kl_coef: 0.05
  group_size: 4

# Environment Configuration
environment:
  timeout_seconds: 10.0
  max_code_length: 2048

# Generation Configuration
generation:
  max_new_tokens: 128
  temperature: 0.8
  top_p: 0.95

# Dataset Configuration
dataset:
  name: "humaneval"
  max_tasks: null
