var1: &pipeline 'serial_ppo'
common:
  experiment_name: gb_serial_ppo
  seed: 0
env:
  name: 'gobigger'
  team_num: 2
  player_num_per_team: 2
  step_mul: 2
  map_width: 64
  map_height: 64
  frame_limit: 3600
  manager_settings:
    food_manager:
        num_init: 260
        num_min: 260
        num_max: 300
    thorns_manager:
        num_init: 3
        num_min: 3
        num_max: 4
    player_manager:
        ball_settings:
            score_init: 13000
  playback_settings:
    playback_type: 'by_frame'
    by_frame:
      save_frame: False
collect:
  env_num: 2
evaluate:
  env_num: 2
  stop_value: 1000000
  eval_episodes_num: 2
  eval_freq: 10000
agent:
  use_cuda: True
  pipeline: *pipeline
  rollout_nstep: 20
  update_per_collect: 5
  batch_size: 64
  load_checkpoint_path: ''
  print_eval_result: True
  print_collect_result: True
  spore_reward_div_value: 10
  opponent_reward_div_value: 1
  clone_reward_div_value: 1
  clip_opponent_reward: False
  clip_clone_reward: True
  clip_spore_reward: True
  use_action_mask: True
  dist_reward_div_value: 10
  reward_div_value: 0.01
  reward_type: 'log_reward'
  spirit: 1
  start_spirit_progress: 0.2
  end_spirit_progress: 0.8
  start_player_score: 13000
  features:
    max_ball_num: 80
    max_food_num: 256
    max_spore_num: 64
    direction_num: 12
    spatial_x: 64
    spatial_y: 64
  loss_parameters:
    gamma: 0.99
    gae_lambda: 0.95
    clip_range: 0.2 # 'lin_0.1'
    clip_range_vf: -1
    reward_normalization: False
    advantage_normalization: False
  loss_weights:
    policy: 1
    value: 0.5
    entropy: 0.01
  optimizer:
    type: 'rmsprop' # chosen from ['adam','rmsprop']
    learning_rate: 0.0001
    weight_decay: 0.0
    eps: 0.00001
    momentum: 0         # used in 'adam' 0.999 &'rmsprop' 0
    decay: 0.99         # used in 'adam' 0.9 &'rmsprop' 0.99
  grad_clip:
    type: 'clip_norm'
    threshold: 0.5
    norm_type: 2
learner:
  n_timesteps: 1e11
  log_show_freq: 100
  save_checkpoint_freq: 1000