algorithm:
  class: MismatchedRPL
  beta: 0.3333
  expectile: 0.7
  reward_reg: 0.0
  reg_replay_weight: 0.5
  actor_replay_weight: 0.5
  value_replay_weight: 0.5
  tau: 0.005
  max_exp_clip: 100.0
  discount: 0.99
  target_freq: 1

checkpoint: null
seed: 0
name: default
debug: false
device: null
wandb:
  activate: false
  entity: null
  project: null

env: HalfCheetah-v3
env_kwargs:
env_wrapper: MujocoParamOverWrite
env_wrapper_kwargs:
  overwrite_args:
    gravity: 0.8
  do_scale: true

optim:
  default:
    class: Adam
    lr: 0.0003

network:
  actor:
    class: SquashedGaussianActor
    hidden_dims: [256, 256]
    reparameterize: false
    conditioned_logstd: false
    logstd_min: -5
    logstd_max: 2
  reward:
    class: MLP
    # ensemble_size: 1
    hidden_dims: [256, 256]
  pref_value:
    class: Critic
    ensemble_size: 2
    hidden_dims: [256, 256]
  rep_value:
    class: Critic
    ensemble_size: 2
    hidden_dims: [256, 256]

dataset:
  - class: MismatchedComparisonDataset
    env: HalfCheetah-gravity0.8
    batch_size: 96
    mode: trajectory
    segment_length: 64
  - class: MismatchedOfflineDataset
    env: HalfCheetah-gravity1.2
    batch_size: 512
    mode: transition


dataloader:
  num_workers: 0  # use the main thread to sample data
  batch_size: null  # do not merge the data along batch axis

trainer:
  env_freq: null
  total_steps: 1000000
  log_freq: 500
  profile_freq: 500
  eval_freq: 5000

eval:
  function: eval_offline
  num_ep: 25
  deterministic: true

schedulers:

processor: null
