#!/usr/bin/env bash

# Train on Mujoco environments (Default: Use 10 expert demo)
# Set expert.demos=1 for using one expert demo.

# Set working directory to iq_learn
cd ..

# Activate python virtual environment
source venv/bin/activate

# Mo-HalfCheetah
python3 train_rl.py env=cheetah agent=sac method.loss=value method.regularize=True agent.actor_lr=3e-05 seed=0 agent.preference="[0.9, 0.1]"
python3 train_rl.py env=cheetah agent=sac method.loss=value method.regularize=True agent.actor_lr=3e-05 seed=0 agent.preference="[0.5, 0.5]"
python3 train_rl.py env=cheetah agent=sac method.loss=value method.regularize=True agent.actor_lr=3e-05 seed=0 agent.preference="[0.1, 0.9]"

# Mo-Hopper
python3 train_rl.py env=hopper agent=sac method.loss=v0 method.regularize=True agent.actor_lr=3e-5 seed=0 agent.preference="[0.8, 0.1, 0.1]"
python3 train_rl.py env=hopper agent=sac method.loss=v0 method.regularize=True agent.actor_lr=3e-5 seed=0 agent.preference="[0.1, 0.8, 0.1]"
python3 train_rl.py env=hopper agent=sac method.loss=v0 method.regularize=True agent.actor_lr=3e-5 seed=0 agent.preference="[0.1, 0.1, 0.8]"


# Mo-ant-v4
python3 train_rl.py env=ant agent=sac method.loss=value method.regularize=True agent.actor_lr=3e-05 agent.init_temp=0.001 seed=0 agent.preference="[0.9, 0.1]"
python3 train_rl.py env=ant agent=sac method.loss=value method.regularize=True agent.actor_lr=3e-05 agent.init_temp=0.001 seed=0 agent.preference="[0.5, 0.5]"
python3 train_rl.py env=ant agent=sac method.loss=value method.regularize=True agent.actor_lr=3e-05 agent.init_temp=0.001 seed=0 agent.preference="[0.1, 0.9]"

# Mo-walker-v4
python3 train_rl.py env=walker agent=sac method.loss=v0 method.regularize=True agent.actor_lr=3e-05 seed=0 agent.preference="[0.9, 0.1]"
python3 train_rl.py env=walker agent=sac method.loss=v0 method.regularize=True agent.actor_lr=3e-05 seed=0 agent.preference="[0.5, 0.5]"
python3 train_rl.py env=walker agent=sac method.loss=v0 method.regularize=True agent.actor_lr=3e-05 seed=0 agent.preference="[0.1, 0.9]"

# Dst
python3 train_rl.py env=dst agent=sac method.loss=value method.regularize=True agent.actor_lr=3e-05 seed=0 agent.preference="[0.9, 0.1]"
python3 train_rl.py env=dst agent=sac method.loss=value method.regularize=True agent.actor_lr=3e-05 seed=0 agent.preference="[0.5, 0.5]"
python3 train_rl.py env=dst agent=sac method.loss=value method.regularize=True agent.actor_lr=3e-05 seed=0 agent.preference="[0.1, 0.9]"