data:
  dataset_type: preference_based
  train_dataset_filepath: ???
  val_dataset_filepath: ???
  env_name: imitation/PointMazeLeftVel-v0
  segment_length: 5
  max_num_pairs: 10
  debug_size: null
  debug_size_mode: "shuffled"
  batch_size: 10000
  num_workers: 8
  shuffle: True
model:
  reward_type: preference_based
  model_type: fully_connected
  hidden_sizes: [32, 32]
  learning_rate: 1e-2
  reward_reg_weight: 1e-4
  target_reward_model_class: "PointMazeEnvRewardModel"
  target_reward_model_kwargs:
training:
  output_dir: ???
  trainer_args:
    gradient_clip_val: 1.0
    gpus: 1
    max_epochs: 50
