data:
  dataset_type: direct_regression
  train_dataset_filepath: ???
  val_dataset_filepath: ???
  env_name: imitation/PointMazeLeftVel-v0
  debug_size: null
  debug_size_mode: "shuffled"
  batch_size: 4096  # 2048
  num_workers: 8
  shuffle: True
model:
  reward_type: direct_regression
  model_type: fully_connected
  hidden_sizes: [32, 32] # [256, 256]
  learning_rate: 2e-2 # 2e-4
  label_overwriting_reward_model_class: "PointMazeEnvRewardModel"
  label_overwriting_reward_model_kwargs:
training:
  output_dir: ???
  trainer_args:
    gradient_clip_val: 1.0
    gpus: 1
    max_epochs: 100
