env: "mt_grid_mpe"

env_args:
  n_agents: 3
  n_landmarks: 6
  field_size: [15,15]
  sight: 14
  episode_limit: 70
  reach_range: 2
  task_id: 0

test_greedy: True
test_nepisode: 32
test_interval: 500
log_interval: 500
runner_log_interval: 500
learner_log_interval: 500
save_model_interval: 10000
t_max: 50000 # 40000 training gradient steps
encoder_train_episode: 20000

id_length: 3
max_agent: 5
max_ally_num: 6
max_enemy_num: 6
max_step_num: 100
reward_loss_weight: 0.01
own_loss_weight: 0.0
ally_loss_weight: 0.0
enemy_loss_weight: 0.0
encoder_reward_scale: 0.1

pretrain_id: -1
prior_role_use_history: False
separate_role_encoding: False

num_tasks: 4
num_train_tasks: 2
train_task_ls: [56,58]
test_task_ls: [57,59]
total_task_ls: [56,57,58,59]
role_ls: [0,1,2,3]
role2task: [[56,58],[56,58],[58],[58]]

policy_path_ls: [
  [
    
  ],
]

role_encoder_path_ls: [
 
]

prior_role_encoder_path_ls: [
 
]

encoder_path_ls: [
  
]

pretrain_path_ls: [
  None
]

# offline dataset, used only for single-task offline
offline_data_folder: "dataset"
role_data_root: ""

offline_data_ls: [
 
]
offline_data_quality: "expert"
offline_max_buffer_size: 10000
offline_data_shuffle: False
offline_data_type: "h5"