alg: BehaviorCloning
alg_kwargs:
  # Configure offline steps. These aren't needed, but good to set.
  offline_steps: -1
  random_steps: 0

optim: Adam
optim_kwargs:
  lr: 0.0003

network: ActorPolicy
network_kwargs:
  actor_class: ContinuousMLPActor
  hidden_layers: [512, 512]
  ortho_init: True

checkpoint: null

eval_env: ManiSkillStateEnv
eval_env_kwargs:
  env_name: PushLargerCube-v1

dataset: OfflineDictDataset
dataset_kwargs:
  path: maniskill_dataset/data_offlinerl_push_larger_1000_only_preferred_sac.npz
  batch_size: 256

processor: null

trainer_kwargs: # Arguments given to Algorithm.train
  total_steps: 100000 # The total number of steps to train
  log_freq: 500 # How often to log values
  profile_freq: 500
  eval_freq: 5000 # How often to run evals
  eval_fn: eval_policy
  eval_kwargs:
    num_ep: 20 # Number of enviornment episodes to run for evaluation, or -1 if none should be run.
  loss_metric: reward # The validation metric that determines when to save the "best_checkpoint"
  train_dataloader_kwargs:
    num_workers: 0 # Number of dataloader workers.
    batch_size: 256

seed: nulls