warehouse-uniform-ppo:
  run: PPO-curriculum
  checkpoint_freq: 1000
  checkpoint_at_end: true
#  restore: null
#  keep_checkpoints_num: 5
  local_dir: ray_results
  stop:
    timesteps_total: 100000000
  config:
    seed: 123
#    callbacks:
#      type: env.gfootball.utils.PvEMetricsCallback

    teacher_config:
      type: agents.curriculum.teacher.UniformTeacher
      num_agents: [2, 4, 6, 8, 10]

    env: env.rware.multi_agent_env.WarehouseMultiAgentEnv
    env_config:
      shelf_columns: 9
      column_height: 8
      shelf_rows: 3
      num_agents: 10
      msg_bits: 3
      sensor_range: 1
      request_queue_size: 5
      max_inactivity_steps: null
      max_steps: 200
      reward_type: 0
#      layout:
#      observation_type:
#      image_observation_layers:
#      image_observation_directional:
#      normalised_coordinates:

    num_workers: 50
    num_cpus_for_driver: 1
    evaluation_num_workers: 10
    evaluation_interval: 20  # iterations
    evaluation_duration: 40  # sum all eval_workers (For example, if there are 2 envs to eval, each will be evaluated for half of the duration)
    evaluation_duration_unit: episodes
    evaluation_parallel_to_training: true
    custom_eval_function:
      type: env.gfootball.utils.EvalFn

    num_envs_per_worker: 3
    num_cpus_per_worker: 1
    num_gpus: 0
    num_gpus_per_worker: 0

    evaluation_config:
      env_config:
        shelf_columns: 9
        column_height: 8
        shelf_rows: 3
        num_agents: 10
        msg_bits: 3
        sensor_range: 1
        request_queue_size: 5
        max_inactivity_steps: null
        max_steps: 200
        reward_type: 0

#    explore: true
#    gamma: 0.99
#    lr: 0.0003
#    observation_filter: MeanStdFilter
#    num_sgd_iter: 6
#    vf_loss_coeff: 0.01
#    model:
#      fcnet_hiddens: [32]
#      fcnet_activation: linear
#      vf_share_layers: true

    multiagent:
      policies: ["shared_policy"]
      # YAML-capable policy_mapping_fn definition via providing a callable class here.
      policy_mapping_fn:
        type: env.gfootball.utils.MultiAgentParameterSharingPolicyMappingFn
      policies_to_train: ["shared_policy"]

#  metric_columns:
#    training_iteration: iter
#    time_total_s: time_total_s
#    timesteps_total: ts
#    episodes_this_iter: episodes_this_iter
#    left_mean_win_rate: left_mean_win_rate
#    right_mean_win_rate: right_mean_win_rate
#    left_version: left_version
#    right_version: right_version
