actor_bc_coef: 0.003
actor_learning_rate: 0.0003
actor_ln: false
actor_n_hiddens: 3
batch_size: 256
critic_bc_coef: 0.001
critic_learning_rate: 0.00005
critic_ln: true
critic_n_hiddens: 3
dataset_name: antmaze-umaze-diverse-v2
eval_episodes: 100
eval_every: 50000
eval_seed: 42
expl_noise: 0.0
gamma: 0.999
group: rebrac-finetune-antmaze-umaze-diverse-v2
hidden_dim: 256
min_decay_coef: 0.5
mixing_ratio: 0.5
name: rebrac-finetune
noise_clip: 0.5
normalize_q: true
normalize_reward: true
normalize_states: false
num_offline_updates: 1000000
num_online_updates: 1000000
num_warmup_steps: 0
policy_freq: 2
policy_noise: 0.2
project: CORL
replay_buffer_size: 2000000
reset_opts: false
tau: 0.005
train_seed: 0
use_calibration: false
