# the rob_ppo config will override default ppo_trainer.yaml

hydra:
  searchpath:
    - file://verl/trainer/config

defaults:
  - ppo_trainer
  - _self_

env:
  rollout:
    pipeline_stage_num: 2
  actor:
    model:
      num_action_chunks: 8
      action_dim: 7
  train:
    simulator_type: libero
    max_episode_steps: 512
    reward_coef: 1.0
    only_eval: False
    video_cfg:
      save_video: True
      video_base_dir: /tmp/videos
    num_envs: 16
    seed: 42
    task_suite_name: libero_10
    init_params:
      camera_depths: False
      camera_heights: 256
      camera_widths: 256
      camera_names: 
        - agentview
        - robot0_eye_in_hand
    
    # Profile the env worker
    profiler:

      # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
      _target_: verl.utils.profiler.ProfilerConfig

      # Profiling tool to use
      # options: nsys, npu, torch, torch_memory
      # Defaults to global_profiler.tool if set
      tool: ${oc.select:global_profiler.tool,null}

      # Whether to enable profiling for env worker
      enable: False

      # Whether to profile all ranks
      all_ranks: False

      # List of ranks to profile (empty means no specific ranks)
      ranks: []

      # Path to save profiling results
      # Defaults to global_profiler.save_path if set
      save_path: ${oc.select:global_profiler.save_path,null}

      # Tool-specific configurations
      tool_config:

        # nsys tool config
        nsys:

          # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
          _target_: verl.utils.profiler.config.NsightToolConfig
        
          # True for each task has its own database, False for all tasks in one training step share one database.
          discrete: ${oc.select:global_profiler.global_tool_config.nsys.discrete}
        
        # npu config
        npu:

          # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
          _target_: verl.utils.profiler.config.NPUToolConfig

          # Contents to profile, can be empty
          # options: npu, cpu, memory, shapes, module, stack
          contents: []

          # Collection level, optional values: level_none, level0, level1, level2.
          level: "level1"

          # Whether to automatically parse the data.
          analysis: True

          # True for each task has its own database, False for all tasks in one training step share one database.
          discrete: False
        
        # torch profiler config
        torch:

          # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
          _target_: verl.utils.profiler.config.TorchProfilerToolConfig

          # Contents to profile, can be empty
          # options: cuda, cpu, memory, shapes, stack
          contents: []

          # True for each task has its own database, False for all tasks in one training step share one database.
          discrete: False


        # torch memory profiler config
        torch_memory:

          # Required when using verl.utils.omega_conf_to_dataclass to instantiate dataclass configs
          _target_: verl.utils.profiler.config.TorchMemoryToolConfig

          # Maximum number of memory allocation entries to track
          trace_alloc_max_entries: ${oc.select:global_profiler.global_tool_config.torch_memory.trace_alloc_max_entries,100000}

          # Stack trace depth for memory allocations
          stack_depth: ${oc.select:global_profiler.global_tool_config.torch_memory.stack_depth,32}
  disagg_sim:
    enable: False
    nnodes: 1


actor_rollout_ref:
  actor:
    num_images_in_input: 1
    traj_mini_batch_size: 16
    fsdp_config:
      wrap_policy:
        transformer_layer_cls_to_wrap: 
          - PrismaticProjector
          - LlamaDecoderLayer
        min_num_params: 0
      param_offload: False
      optimizer_offload: False
      forward_prefetch: True
      fsdp_size: -1
  rollout:
    mode: async_envloop
    prompt_length: 512
