data:
  dataset_type: preference_based
  train_dataset_filepath: ???
  val_dataset_filepath: ???
  env_name: BouncingBallsEnv-v0
  segment_length: 25
  max_num_pairs: 20
  debug_size: null
  debug_size_mode: "shuffled"  
  batch_size: 2048
  num_workers: 8
  shuffle: True
model:
  reward_type: preference_based
  model_type: learned_bouncing_balls_env
  hidden_sizes: [256, 256]
  learning_rate: 2e-4
  reward_reg_weight: 1e-2
  target_reward_model_class: "BouncingBallsEnvRewardModel"
  target_reward_model_kwargs:
    reaching_goal_reward: 1
    obstacle_collision_reward: 0
    action_magnitude_reward: 0
    distance_from_goal_reward: 0
    shaping_toward_goal_factor: 1.0
    shaping_discount: 0.95
training:
  output_dir: ???
  trainer_args:
    gradient_clip_val: 1.0
    gpus: 1
    max_epochs: 20
