# @package _global_

data:
  train_files: ${oc.env:HOME}/data/amc/test.parquet
  val_files: ${oc.env:HOME}/data/amc/test.parquet
  max_prompt_length: 1024
  max_response_length: 3072
  filter_overlong_prompts: true
  train_batch_size: 80

actor_rollout_ref:
    actor:
      ppo_mini_batch_size: 80
      ppo_micro_batch_size_per_gpu: 20 
    rollout:
      val_kwargs:
        # sampling parameters for validation
        top_k: -1 # 0 for hf rollout, -1 for vllm rollout
        top_p: 0.5
        temperature: 0.8
        n: 32

trainer:
  test_freq: 1
  total_epochs: 150
