actor_learning_rate: 0.0003
alpha_learning_rate: 0.0003
batch_size: 256
buffer_size: 1000000
checkpoints_path: null
critic_learning_rate: 0.0003
deterministic_torch: false
device: cuda
env_name: "pen-human-v1"
eta: 1000.0
eval_episodes: 10
eval_every: 5
eval_seed: 42
gamma: 0.99
group: "edac-pen-human-v1-multiseed-v2"
hidden_dim: 256
log_every: 100
max_action: 1.0
name: "EDAC"
normalize_reward: false
num_critics: 20
num_epochs: 3000
num_updates_on_epoch: 1000
project: offline-RL-init
tau: 0.005
train_seed: 10