#!/usr/bin/env bash

# Train on Mujoco environments (Default: Use 10 expert demo)
# Set expert.demos=1 for using one expert demo.

# Set working directory to iq_learn
cd ..

# HalfCheetah-v4
python3 train_moiq.py env=cheetah agent=sac agent.preferences="[[0.9, 0.1],[0.5, 0.5],[0.1, 0.9]]" expert.demos=10 method.loss=value method.regularize=True agent.actor_lr=3e-05 seed=0 agent.constraint_coef=5

# Hopper-v4
python3 train_moiq.py env=hopper agent=sac agent.preferences="[[0.8, 0.1, 0.1],[0.1, 0.8, 0.1],[0.1, 0.1, 0.8]]" expert.demos=10 method.loss=v0 method.regularize=True agent.actor_lr=3e-5 seed=0 agent.constraint_coef=5

# Ant-v4
python3 train_moiq.py env=ant agent=sac agent.preferences="[[0.9, 0.1],[0.5, 0.5],[0.1, 0.9]]" expert.demos=10 method.loss=v0 method.regularize=True agent.actor_lr=3e-05 agent.init_temp=0.001 seed=0 agent.constraint_coef=5

# Walker-v4
python3 train_moiq.py env=walker agent=sac agent.preferences="[[0.9, 0.1],[0.5, 0.5],[0.1, 0.9]]" expert.demos=10 method.loss=v0 method.regularize=True agent.actor_lr=3e-05 seed=0 agent.constraint_coef=5

# Dst
python3 train_moiq.py env=dst agent=sac agent.preferences="[[0.9, 0.1],[0.5, 0.5],[0.1, 0.9]]" expert.demos=10 method.loss=value method.regularize=True agent.actor_lr=3e-05 seed=0 agent.constraint_coef=5