gfootball-1v1-ppo:
  run: PPO
  checkpoint_freq: 50
  checkpoint_at_end: true
  keep_checkpoints_num: 5
  local_dir: ray_results
  stop:
    timesteps_total: 100000000
  config:
    callbacks:
      type: env.gfootball.utils.PvEMetricsCallback
    env: env.gfootball.multi_agent_env.FootballPvEEnv
    env_config:
      env_name: 1_vs_1_half_field
      stacked: false
      rewards: scoring
      write_goal_dumps: false
      write_full_episode_dumps: false
      render: false
      write_video: false
      dump_frequency: 200
      representation: simple115v2
      number_of_left_players_agent_controls: 1
      logdir: dumps
      other_config_options:
        action_set: default  # "default": action_set_v1 (19), "v2": action_set_v2 (19 + 1 built-in ai)
      in_evaluation: false

    num_workers: 100
    num_cpus_for_driver: 0
    evaluation_num_workers: 20
    evaluation_interval: 10  # iterations
    evaluation_duration: 40  # sum all eval_workers (For example, if there are 2 envs to eval, each will be evaluated for half of the duration)
    evaluation_duration_unit: episodes
    evaluation_parallel_to_training: true

    num_envs_per_worker: 1
    num_cpus_per_worker: 1
    num_gpus: 0
    num_gpus_per_worker: 0
    rollout_fragment_length: 400
    train_batch_size: 40000
    sgd_minibatch_size: 10000
    num_sgd_iter: 6

    evaluation_config:
      env_config:
        env_name: 1_vs_1_half_field
        stacked: false
        rewards: scoring
        write_goal_dumps: false
        write_full_episode_dumps: false
        render: false
        write_video: false
        dump_frequency: 10
        representation: simple115v2
        number_of_left_players_agent_controls: 1
        logdir: eval_dumps
        other_config_options:
          action_set: default  # "default": action_set_v1 (19), "v2": action_set_v2 (19 + 1 built-in ai)
        in_evaluation: true

    explore: true
    gamma: 0.99
    lr: 0.0003
#    observation_filter: MeanStdFilter
    vf_loss_coeff: 0.01
#    model:
#      fcnet_hiddens: [32]
#      fcnet_activation: linear
#      vf_share_layers: true

    multiagent:
      policies: ["shared_policy"]
      # YAML-capable policy_mapping_fn definition via providing a callable class here.
      policy_mapping_fn:
        type: env.gfootball.utils.MultiAgentParameterSharingPolicyMappingFn
      policies_to_train: ["shared_policy"]
