source ~/PB2/pebenv/bin/activate

python ~/PB2/train_DPB2.py \
	env=cheetah_run \
	experiment=PB2-abl \
	agent.params.actor_lr=0.0005 \
	agent.params.critic_lr=0.0005 \
	gradient_update=1 \
	activation=tanh \
	num_unsup_steps=9000 \
	num_train_steps=500000 \
	num_interact=20000 \
	max_feedback=100 \
	reward_batch=10 \
	reward_update=2000 \
	teacher_beta=-1 \
	teacher_gamma=1 \
	teacher_eps_mistake=0 \
	teacher_eps_skip=0 \
	teacher_eps_equal=$4 \
	segment=50 \
	max_reward_buffer_size=10 \
	data_aug_ratio=20 \
	ensemble_size=1 \
	explore=false \
	her_ratio=0.5 \
	wandb=true \
	device='cuda' \
	seed=$1 \
	population_size=3 \
	gradient_update=1 \
	tpa=true \
	agent.params.beta_init=$2 \
	disc.lr=$3 \
	disc.layernorm=false \
	disc.on_policy=$5 \
	copy_agent=$6 \
	wandb=true \
	
