actor_learning_rate: 0.0018
alpha_learning_rate: 0.0018
batch_size: 10000
buffer_size: 1000000
checkpoints_path: null
critic_layernorm: false
critic_learning_rate: 0.0018
deterministic_torch: false
device: "сuda"
edac_init: false
env_name: "halfcheetah-random-v2"
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: "lb-sac-halfcheetah-random-v2-multiseed-v0"
hidden_dim: 256
log_every: 100
max_action: 1.0
name: "LB-SAC"
num_critics: 2
num_epochs: 300
num_updates_on_epoch: 1000
project: "CORL"
tau: 0.005
train_seed: 10
