actor_learning_rate: 0.0003
alpha_learning_rate: 0.0003
batch_size: 256
buffer_size: 1000000
checkpoints_path: null
critic_learning_rate: 0.0003
deterministic_torch: false
device: cuda
env_name: "maze2d-umaze-v1"
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: "sac-n-maze2d-umaze-v1-multiseed-v0"
hidden_dim: 256
log_every: 100
max_action: 1.0
name: "SAC-N"
normalize_reward: false
num_critics: 25
num_epochs: 3000
num_updates_on_epoch: 1000
project: offline-RL-init
tau: 0.005
train_seed: 10