SEED=1


python -m train \
    --xpid=ued-MultiGrid-GoalLastVariableBlocksAdversarialEnv-v0-domain_randomization-noexpgrad-lstm256a-lr0.0001-epoch5-mb1-v0.5-gc0.5-henv0.0-ha0.0-plr0.5-rho0.5-n4000-st0.3-positive_value_loss-rank-t0.3-tl_0 \
    --env_name=MultiGrid-GoalLastVariableBlocksAdversarialEnv-v0 \
    --use_gae=True \
    --gamma=0.995 \
    --gae_lambda=0.95 \
    --seed=${SEED} \
    --num_control_points=12 \
    --recurrent_arch=lstm \
    --recurrent_agent=True \
    --recurrent_adversary_env=False \
    --recurrent_hidden_size=256 \
    --use_global_critic=False \
    --lr=0.0001 \
    --num_steps=256 \
    --num_processes=32 \
    --num_env_steps=500000000 \
    --ppo_epoch=5 \
    --num_mini_batch=1 \
    --entropy_coef=0.0 \
    --value_loss_coef=0.5 \
    --clip_param=0.2 \
    --clip_value_loss=True \
    --adv_entropy_coef=0.0 \
    --max_grad_norm=0.5 \
    --algo=ppo \
    --ued_algo=domain_randomization \
    --use_plr=True \
    --level_replay_prob=0.5 \
    --level_replay_rho=0.5 \
    --level_replay_seed_buffer_size=4000 \
    --level_replay_score_transform=rank \
    --level_replay_temperature=0.3 \
    --staleness_coef=0.3 \
    --no_exploratory_grad_updates=True \
    --use_editor=False \
    --level_editor_prob=0 \
    --level_editor_method=random \
    --num_edits=0 \
    --base_levels=batch \
    --use_accel_paired=False \
    --accel_paired_score_function=paired \
    --use_lstm=False \
    --use_behavioural_cloning=False \
    --kl_loss_coef=0.0 \
    --kl_update_step=1 \
    --use_kl_only_agent=False \
    --log_interval=100 \
    --screenshot_interval=1000 \
    --log_grad_norm=False \
    --handle_timelimits=True \
    --checkpoint_basis=student_grad_updates \
    --archive_interval=1000 \
    --level_replay_strategy=positive_value_loss \
    --test_env_names=MultiGrid-SixteenRooms-v0,MultiGrid-SixteenRoomsFewerDoors-v0,MultiGrid-Labyrinth-v0,MultiGrid-Labyrinth2-v0,MultiGrid-Maze-v0,MultiGrid-Maze2-v0,MultiGrid-Maze3-v0,MiniGrid-SimpleCrossingS11N5-v0,MiniGrid-FourRooms-v0,MultiGrid-SmallCorridor-v0,MultiGrid-LargeCorridor-v0,MultiGrid-PerfectMazeMedium-v0 \
    --test_interval=1000 \
    --test_num_episodes=100 \
    --log_dir=logs/MG_PLR_${SEED} \
    --log_action_complexity=True \
    --log_plr_buffer_stats=True \
    --log_replay_complexity=True \
    --reject_unsolvable_seeds=False \
    --checkpoint=True