##################################################################################
# Common
##################################################################################

project_name: 'SimbaV2'
group_name: 'test'
exp_name: 'test'
seed: 0
server: 'local'
save_path: 'models/${group_name}/${exp_name}/${env.env_name}/${seed}'
load_path: null

##################################################################################
# Training
##################################################################################

# gamma value is set with a heuristic from TD-MPCv2
eff_episode_len: ${eval:'${env.max_episode_steps} / ${env.action_repeat}'}
gamma: ${eval:'max(min((${eff_episode_len} / 5 - 1) / (${eff_episode_len} / 5), 0.995), 0.95)'}
n_step: 1

num_epochs: 100
action_repeat: ${env.action_repeat}

num_interaction_steps: null              # update steps
updates_per_interaction_step: 1          # fixed
evaluation_per_interaction_step: 50_000  # evaluation frequency per interaction step.
metrics_per_interaction_step: 50_000     # log metrics per interaction step.
recording_per_interaction_step: -1       # recording is not supported for offline rl yet.
logging_per_interaction_step: 10_000     # logging frequency per interaction step.
save_checkpoint_per_interaction_step: null
num_eval_episodes: 100
num_record_episodes: 1

defaults:
- _self_
- agent: simbaV2_bc
- buffer: numpy_uniform
- env: d4rl
