env: "mt_grid_mpe"

env_args:
  n_agents: 3
  n_landmarks: 6
  field_size: [15,15]
  sight: 14
  episode_limit: 70
  reach_range: 2
  task_id: 0

test_greedy: True
test_nepisode: 32
test_interval: 500
log_interval: 500
runner_log_interval: 500
learner_log_interval: 500
save_model_interval: 2500
t_max: 50000 # 40000 training gradient steps
encoder_train_episode: 60000

id_length: 2
max_agent: 3
max_ally_num: 6
max_enemy_num: 6
max_step_num: 100
reward_loss_weight: 0.01
own_loss_weight: 0.0
ally_loss_weight: 0.0
enemy_loss_weight: 0.0
encoder_reward_scale: 0.1


pretrain_id: -1
vae_id: 2
prior_role_use_history: False
separate_role_encoding: False

num_tasks: 20
num_train_tasks: 10
train_task_ls: [0,3,5,7,10,11,13,17,18,19]
test_task_ls: [1,2,4,6,8,9,12,14,15,16]
total_task_ls: [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]
role_ls: [0,1,2,3,4,5]
role2task: [[0,3,5,7],[0,3,10,11,13],[0,5,10,11,17,18],[7,10,13,17,19],[5,7,11,13,18,19],[3,17,18,19]]

vae_path_ls: [
]

policy_path_ls: [
]

role_encoder_path_ls: [
]

prior_role_encoder_path_ls: [
]

encoder_path_ls: [
]


encoder_path_ls_old: [
]

pretrain_path_ls: [
]

# offline dataset, used only for single-task offline
offline_data_folder: "dataset"
role_data_root : ""

offline_data_ls: [
]
offline_data_quality: "expert"
offline_max_buffer_size: 10000
offline_data_shuffle: False
offline_data_type: "h5"