actor_learning_rate: 0.0003
alpha_learning_rate: 0.0003
batch_size: 256
buffer_size: 1000000
checkpoints_path: null
critic_learning_rate: 0.0003
deterministic_torch: false
device: cuda
env_name: "hopper-random-v2"
eta: 1.0
beta: -2.0
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: "msg-hopper-random-v2-multiseed-v2"
hidden_dim: 256
log_every: 100
max_action: 1.0
name: "MSG"
normalize_reward: false
num_critics: 64
num_epochs: 3000
num_updates_on_epoch: 1000
project: offline-RL-init
tau: 0.005
train_seed: 10