common: &common
  buffer_size: 100_000
  batch_size: 16 
  learning_starts: 800
  total_timesteps: 1_000_000
  norm_obs: False
  norm_reward: False
  act_fn: "SiLU"
  train_freq: 1
  gradient_steps: 1
  rssm_sequence_length: 64
  actor_critic_sequence_length: 15
  policy_kwargs:
    net_depth: 2
    net_width: 512
    recurrent_state_size: 1024
