# the rob_ppo config will override default ppo_trainer.yaml

hydra:
  searchpath:
    - file://verl/trainer/config

defaults:
  - ppo_trainer
  - _self_

env:
  rollout:
    pipeline_stage_num: 2
  actor:
    model:
      num_action_chunks: 8
      action_dim: 7
  train:
    simulator_type: libero
    max_episode_steps: 512
    reward_coef: 1.0
    only_eval: False
    video_cfg:
      save_video: True
      video_base_dir: /tmp/videos
    num_envs: 16
    seed: 42
    task_suite_name: libero_10
    init_params:
      camera_depths: False
      camera_heights: 256
      camera_widths: 256
      camera_names: 
        - agentview
        - robot0_eye_in_hand
  disagg_sim:
    enable: False
    nnodes: 1


actor_rollout_ref:
  actor:
    num_images_in_input: 1
    traj_mini_batch_size: 16
    fsdp_config:
      wrap_policy:
        transformer_layer_cls_to_wrap: 
          - PrismaticProjector
          - LlamaDecoderLayer
        min_num_params: 0
      param_offload: False
      optimizer_offload: False
      forward_prefetch: True
      fsdp_size: -1
  rollout:
    mode: async_envloop
    prompt_length: 512
