name: ft_l2
seed: 0

defaults: 
  - logger: wandb
  - scenario: halfcheetah/forgetting
  - hydra: hydra
  - framework/params/algorithm/generic_sac
  - framework/params/policy_agent/generic_policy
  - framework/params/critic_agent/generic_critic
  - framework/params/evaluation/generic_evaluation
  - optional framework/params/algorithm: ${scenario} # best HPS for sac are loaded if available
  - optional framework/params/policy_agent: ${scenario} # using layer norm and random policy warmup if CW
  - optional framework/params/critic_agent: ${scenario} # using layer norm if CW
  - override hydra/launcher: submitit_slurm

framework:
  classname: crl.frameworks.TwoSteps
  seed: ${seed}
  params:
    checkpoint: False

    algorithm2:
      classname: crl.algorithms.L2
      params:
        device: cuda:0
        n_samples: 2048

    policy_agent:
      classname: crl.agents.L2ActionAgent
      l2_coeff: 1. # 0.01,1.,100,10000