actor_learning_rate: 0.0003
alpha_learning_rate: 0.0003
batch_size: 256
buffer_size: 2000000
checkpoints_path: null
critic_learning_rate: 0.0003
deterministic_torch: false
device: cuda
env_name: "pen-human-v1"
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: "sac-n-pen-human-v1-multiseed-v0"
hidden_dim: 256
log_every: 100
max_action: 1.0
name: "SAC-N"
normalize_reward: false
num_critics: 100
num_epochs: 3000
num_updates_on_epoch: 1000
project: "CORL"
tau: 0.005
train_seed: 10