# ACEAS Configuration for DeepSeek-Coder-1.3B
# Optimized for 2x A10G (24GB) GPUs

# Model Configuration
model:
  name: "deepseek-ai/deepseek-coder-1.3b-base"
  use_lora: true
  lora_r: 32
  lora_alpha: 64
  lora_target_modules:
    - "q_proj"
    - "k_proj"
    - "v_proj"
    - "o_proj"
    - "gate_proj"
    - "up_proj"
    - "down_proj"
  gradient_checkpointing: true
  load_in_8bit: false
  load_in_4bit: false
  torch_dtype: "float16"

# Training Configuration
training:
  total_timesteps: 50000
  batch_size: 8  # Reduced from 32 for memory constraints
  learning_rate: 5e-6  # Slightly lower for larger model
  eval_interval: 2500
  save_interval: 5000
  log_interval: 100

# Curriculum Configuration
curriculum:
  strategy: "adaptive"
  acb_exploration: 1.0
  acb_alpha: 0.7

# Async/Scheduling Configuration
scheduling:
  num_workers: 2  # One worker per A10G GPU
  gpu_per_worker: 1.0  # Full GPU per worker for 1.3B model
  use_async: true
  use_csc: true
  use_eaas: true
  eta_base: 8.0
  lambda_coupling: 0.5

# GRPO Configuration
grpo:
  clip_epsilon: 0.2
  kl_coef: 0.05  # Lower KL coefficient for larger model
  group_size: 4  # Reduced due to smaller batch size

# Environment Configuration
environment:
  timeout_seconds: 15.0  # Increased timeout for complex generations
  max_code_length: 2048

# Generation Configuration
generation:
  max_new_tokens: 512  # Increased for full code generation
  temperature: 0.8
  top_p: 0.95

# Dataset Configuration
dataset:
  name: "humaneval"
  max_tasks: null  # Use all tasks
