general:
  save_path: '<save path>'
  use_peft: true
  save_last: false
  target_voc: 0.5
  cnt_proceed: 3
  level: 2
  video_crop_box: [0, 0, 640, 330]
  gb_crop_box: [0, 0, 160, 120]
  rollout_size: 8

model:
  model_path: "<model path>"
  lora_path: "<lora chkpt path>"

environment:
  max_img_obs_len: 5

voc:
  buffer_size: 15
  n_repeats: 2
  prompt_version: v1

peft:
  r: 32
  lora_alpha: 64
  lora_dropout: 0.1
  target_modules: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "down_proj", "up_proj"]
  bias: "none"

grpo:
  output_dir: "output dir"
  per_device_train_batch_size: 1
  gradient_accumulation_steps: 32
  gradient_checkpointing: true
  learning_rate: 1e-5
  warmup_steps: 10
  num_iterations: 1
  max_steps: 100000
  report_to: "wandb"
  bf16: true
  max_prompt_length: null
  max_completion_length: 2048
  num_generations: 4
  max_grad_norm: 1
  seed: 54
  temperature: 1.0
  beta: 0.05
  top_p: 0.9
  logging_strategy: "steps"
  logging_steps: 1
  save_strategy: "steps"
  save_steps: 100000
  save_only_model: true
  run_name: 'lvl-${general.level}-rs-${general.rollout_size}-mu-${grpo.num_iterations}-num_gen-${grpo.num_generations}-temp-${grpo.temperature}-beta-${grpo.beta}-lr-${grpo.learning_rate}-seed-${grpo.seed}-tvoc-${general.target_voc}-nrep-${voc.n_repeats}'