envs:
  - 
    name: "BouncingBallsEnv-v0"
    expert_policy_learning:
      config: "expert_policy_learning/expert.yaml"
    data_generation:
      random_config: "data_generation/random.yaml"
      reward_learning_train_size: 2000000
      reward_learning_val_size: 500000
      reward_evaluation_size: 500000
    reward_learning:
      algorithms:
        -
          name: regression_random_dataset
          dataset_type: "random"
          config: "reward_learning/regression/defaults.yaml"
        -
          name: regression_random_dataset_ood
          dataset_type: "random"
          config: "reward_learning/regression/ood.yaml"
        -
          name: preference_random_dataset
          dataset_type: "random"
          config: "reward_learning/preference/defaults.yaml"
    reward_evaluation:
      algorithms:
        -
          name: no_canonicalization_random_dataset
          dataset_type: "random"
          config: "reward_evaluation/no_canonicalization.yaml"
        -
          name: no_canonicalization_expert_dataset
          dataset_type: "expert"
          config: "reward_evaluation/no_canonicalization.yaml"
        -
          name: EPIC_random_dataset
          dataset_type: "random"
          config: "reward_evaluation/epic.yaml"
        -
          name: EPIC_expert_dataset
          dataset_type: "expert"
          config: "reward_evaluation/epic.yaml"
        -
          name: EPIC_random_dataset_out_of_dist
          dataset_type: "random"
          config: "reward_evaluation/epic_linear_actions_out_of_dist.yaml"
        -
          name: EPIC_expert_dataset_out_of_dist
          dataset_type: "expert"
          config: "reward_evaluation/epic_linear_actions_out_of_dist.yaml"
    arbitrary_reward_policy_learning:
      rewards:
        - goal_+1
        - goal_+1_shaping_+1
        - feas_random_goal_+1_shaping_+1
        - regression_random_dataset
        - regression_random_dataset_ood
        - preference_random_dataset
    policy_evaluation:
      config: "policy_evaluation/defaults.yaml"
      policies:
        - goal_+1
        - goal_+1_shaping_+1
        - feas_random_goal_+1_shaping_+1
        - regression_random_dataset
        - regression_random_dataset_ood
        - preference_random_dataset
  -
    name: "CustomReacherEnv-v0"
    expert_policy_learning:
      config: "expert_policy_learning/expert.yaml"
    data_generation:
      random_config: "data_generation/random.yaml"
      reward_learning_train_size: 4000000
      reward_learning_val_size: 1000000
      reward_evaluation_size: 1000000
    reward_learning:
      algorithms:
        -
          name: regression_random_dataset
          dataset_type: "random"
          config: "reward_learning/regression/defaults.yaml"
        -
          name: regression_random_dataset_ood
          dataset_type: "random"
          config: "reward_learning/regression/ood.yaml"
        -
          name: preference_random_dataset
          dataset_type: "random"
          config: "reward_learning/preference/defaults.yaml"
    reward_evaluation:
      algorithms:
        -
          name: no_canonicalization_random_dataset
          dataset_type: "random"
          config: "reward_evaluation/no_canonicalization.yaml"
        -
          name: no_canonicalization_expert_dataset
          dataset_type: "expert"
          config: "reward_evaluation/no_canonicalization.yaml"
        -
          name: EPIC_random_dataset
          dataset_type: "random"
          config: "reward_evaluation/epic.yaml"
        -
          name: EPIC_expert_dataset
          dataset_type: "expert"
          config: "reward_evaluation/epic.yaml"
        - 
          name: EPIC_random_dataset_out_of_dist
          dataset_type: "random"
          config: "reward_evaluation/epic_w_dyn_out_of_dist.yaml"
        - 
          name: EPIC_expert_dataset_out_of_dist
          dataset_type: "expert"
          config: "reward_evaluation/epic_w_dyn_out_of_dist.yaml"
    arbitrary_reward_policy_learning:
      rewards:
        - ground_truth
        - ground_truth_shaping
        - regression_random_dataset
        - regression_random_dataset_ood
        - preference_random_dataset
    policy_evaluation:
      config: "policy_evaluation/defaults.yaml"
      policies:
        - ground_truth
        - ground_truth_shaping
        - regression_random_dataset
        - regression_random_dataset_ood
        - preference_random_dataset
  -
    name: "PointMazeLeftVel-v0"
    expert_policy_learning:
      config: "expert_policy_learning/expert.yaml"
    data_generation:
      random_config: "data_generation/random.yaml"
      reward_learning_train_size: 4000000
      reward_learning_val_size: 1000000
      reward_evaluation_size: 1000000
    reward_learning:
      algorithms:
        -
          name: regression_random_dataset
          dataset_type: "random"
          config: "reward_learning/regression/defaults.yaml"
    reward_evaluation:
      algorithms:
        -
          name: no_canonicalization_random_dataset
          dataset_type: "random"
          config: "reward_evaluation/no_canonicalization.yaml"
        -
          name: no_canonicalization_expert_dataset
          dataset_type: "expert"
          config: "reward_evaluation/no_canonicalization.yaml"
        -
          name: EPIC_random_dataset
          dataset_type: "random"
          config: "reward_evaluation/epic.yaml"
        -
          name: EPIC_expert_dataset
          dataset_type: "expert"
          config: "reward_evaluation/epic.yaml"
        - 
          name: EPIC_random_dataset_out_of_dist
          dataset_type: "random"
          config: "reward_evaluation/epic_w_dyn_out_of_dist.yaml"
        - 
          name: EPIC_expert_dataset_out_of_dist
          dataset_type: "expert"
          config: "reward_evaluation/epic_w_dyn_out_of_dist.yaml"
    arbitrary_reward_policy_learning:
      rewards:
        - ground_truth
        - regression_random_dataset
    policy_evaluation:
      config: "policy_evaluation/defaults.yaml"
      policies:
        - ground_truth
        - regression_random_dataset
