command:
  - python3.10
  - ${program}
  - ${args_no_boolean_flags}
entity: team
method: grid
name: TOReL_MOReL
program: torel/orl_algos/torel_morel.py

parameters:
  # --- logging ---
  log:
    value: true
  wandb_project:
    value: sorel-torel-test
  wandb_team:
    value: team
  wandb_group:
    value: group

  # --- run identification ---
  seed:
    value: 0
  algo:
    value: morel
  num_updates:
    value: 3_000_000

  # --- environment and offline dataset ---
  task:
    value: brax-halfcheetah-full-replay
  n_value:
    value: 200000
  min_reward: 
    value: -0.5
  max_reward:
    value: 3.5
  discount_factor:
    value: 0.998

  # --- morel ---
  lr:
    value: 1e-4
  batch_size:
    value: 256
  gamma:
    value: 0.99
  polyak_step_size:
    value: 0.005
  model_retain_epochs:
    value: 5
  num_critics:
    value: 10
  rollout_batch_size:
    value: 50000
  rollout_interval:
    value: 1000
  rollout_length:
    value: 5
  dataset_sample_ratio:
    value: 0.01
  threshold_coef:
    values: [0, 5, 10, 15, 20, 25]
  term_penalty_offset:
    values: [-30, -50, -100, -200]

  # --- evaluation ---
  num_eval_workers:
    value: 20
