env_name: MarlGridFindGoal_TRType_Const_TRM5_VS3
num_env_steps: 40000000.0
env_configs: {time_limit: 512,
             seed : 1,
             env_type : 'c',
             num_agents : 3,
             num_adversaries : 0,
             max_steps : 512,
             grid_size : 15,
             observation_style : 'dict',
             observe_position : False,
             observe_self_position : True,
             observe_self_env_act : False,
             observe_t : False,
             observe_done : False,
             neutral_shape : True,
             can_overlap : False,
             active_after_done : False,

             discrete_position : True,

             view_size : 3,
             view_tile_size : 6,
             clutter_density : 0.15,

             # if `num_blind_agents` :: b, the FIRST b agents do not get image obs
             num_blind_agents : 0,

             # IMPORTANT NOTE: this is set to be 0 because this codebase do not need messages going through the environment
             # agent comm length
             comm_len : 0,

             # if False, use continuous communication
             discrete_comm : False,

             # team reward settings
             team_reward_type : 'const',
             team_reward_freq : 'none',
             team_reward_multiplier : 5,

             info_gain_rew : False,

             # update env policy / comm policy using only env reward / team reward
             separate_rew_more : False
  }
