# if not in debug mode, need to specify wandb config manually in 'config/train_QPA.yaml' and set 'wandb=true'

# walker_walk:
for params in "12345 0"; do
        seed=$(echo $params | cut -d' ' -f1)
        device=$(echo $params | cut -d' ' -f2)
		python train_DPB2.py \
        	env=quadruped_walk \
			experiment=DPB2-Debug \
			agent.params.actor_lr=0.0001 \
			agent.params.critic_lr=0.0001 \
			gradient_update=1 \
			activation=tanh \
			num_unsup_steps=900 \
			num_train_steps=75000 \
			num_interact=3000 \
			max_feedback=1000 \
			reward_batch=100 \
			reward_update=2000 \
        	teacher_beta=-1 \
        	teacher_gamma=1 \
        	teacher_eps_mistake=0 \
        	teacher_eps_skip=0 \
        	teacher_eps_equal=0.1 \
        	segment=50 \
        	max_reward_buffer_size=10 \
        	data_aug_ratio=20 \
        	ensemble_size=1 \
        	explore=false \
        	her_ratio=0.5 \
			population_size=3 \
			tpa=true \
			copy_agent=false \
			agent.params.beta_init=0.25 \
			disc.lr=0.00001 \
			disc.layernorm=false \
        	wandb=true \
        	device=$device \
        	seed=$seed
done
# 
# > logs/$seed.log &
# nohup