env=$1
map=$2
replay_buffer_capacity=$3
replay_buffer_batch_size=$4
num_train_steps=$5

env_name="$env-$map"

PYTHONPATH=. python3 -u main.py \
setup.alg=hy_moore \
env=$env_name \
agent=hybrid_sac \
experiment.name=$env \
experiment.num_train_steps=$num_train_steps \
experiment.add_sample_in_traj=True \
replay_buffer.capacity=$replay_buffer_capacity \
replay_buffer.batch_size=$replay_buffer_batch_size \
agent.encoder.type_to_select=identity \
agent.multitask.should_use_disentangled_alpha=True \
agent.multitask.should_use_task_encoder=False \
agent.multitask.should_use_task_onehot=True \
agent.multitask.should_use_multi_head_policy=True \
agent.multitask.actor_cfg.moe_should_use=True \
agent.moe_component.hidden_layers=1 \
agent.moe_component.emb_hidden_layers=1 \
agent.moe_component.num_layers=3 \
agent.moe_component.num_experts=4 \
agent.moe_component.module_hidden_features=400 \
agent.moe_component.gating_hidden_features=256 \
agent.moe_component.gating_hidden_layers=0 \
agent.moe_component.cond_obs=False \
agent.moe_component.use_moore=True \
agent.multitask.actor_cfg.should_condition_model_on_task_info=False \
agent.multitask.actor_cfg.should_condition_encoder_on_task_info=False \
agent.multitask.actor_cfg.should_concatenate_task_info_with_encoder=False \
agent.multitask.hybrid_mtrl=True \
'experiment.reset_network_names=[critic]' \
'experiment.reset_network_steps=[300000, 600000, 900000, 1200000]'