# @package _group_
env: "gym___HalfCheetah-v4"
term_fn: "no_termination"
exp_name: "sac_halfcheetah_3_mil_steps_test"
project_name: "optimistic_rl_cheetah"
seed: 0

action_penalty: 0
num_steps: 3000000
epoch_length: 1000
rollout_batch_size: 1000
rollout_schedule: [20, 150, 1, 1]
num_sac_updates_per_step: 10
sac_updates_every_steps: 1
num_epochs_to_retain_sac_buffer: 1

# Model params
ensemble_size: 7
deterministic: false
propagation_method: expectation
use_optimism: false # for optimistic reward selectin
initial_low_reward_percentile: 0.1
final_low_reward_percentile: 0.5
high_reward_percentile: 1
clamp_max_logvar: true

sac_gamma: 0.99
sac_tau: 0.005
sac_alpha: 0.2
sac_policy: "Gaussian"
sac_target_update_interval: 1
sac_automatic_entropy_tuning: true
sac_target_entropy: -1
sac_hidden_size: 512
sac_lr: 0.0003
sac_batch_size: 256
