alpha: 2.5
batch_size: 256
buffer_size: 10000000
checkpoints_path: null
device: cuda
discount: 0.99
env: pen-human-v1
eval_freq: 5000
expl_noise: 0.1
group: td3-bc-adroit-pen-human-v1-multiseed-v0
load_model: ''
max_timesteps: 1000000
n_episodes: 10
name: TD3-BC
noise_clip: 0.5
normalize: true
normalize_reward: false
policy_freq: 2
policy_noise: 0.2
project: CORL
seed: 0
tau: 0.005
