custom_envs:
  SimpleSokoban:
    env_type: sokoban
    max_actions_per_traj: 30 # used in environment state manager to control the actual max actions executed per trajectory
    # env_instruction: "You are solving the Sokoban puzzle. You are the player and you need to push all boxes to targets. When you are right next to a box, you can push it by moving in the same direction. You cannot push a box through a wall, and you cannot pull a box. The answer should be a sequence of actions, like <answer>Right || Right || Up</answer>"
    # max_tokens: 100 # used to curate llm prompt "max words", not used for rollout
    env_config: # keys should be a subset of SokobanConfig
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 30
      search_depth: 30
      
  SimpleSokobanCardinal:
    env_type: sokoban_cardinal
    max_actions_per_traj: 30
    # env_instruction: "You are solving the Sokoban puzzle. You are the player and you need to push all boxes to targets. When you are right next to a box, you can push it by moving in the same direction. You cannot push a box through a wall, and you cannot pull a box. The answer should be a sequence of actions, like <answer>Right || Right || Up</answer>"
    # max_tokens: 100
    env_config:
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 30
      search_depth: 30
      action_lookup:
        1: "North"
        2: "South"
        3: "West"
        4: "East"

  FakeSokobanCardinal:
    env_type: sokoban_cardinal
    max_actions_per_traj: 30
    # env_instruction: "You are solving the Sokoban puzzle. You are the player and you need to push all boxes to targets. When you are right next to a box, you can push it by moving in the same direction. You cannot push a box through a wall, and you cannot pull a box. The answer should be a sequence of actions, like <answer>Right || Right || Up</answer>"
    # max_tokens: 100
    env_config:
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 30
      search_depth: 30

  SimpleSokobanEmoji:
    env_type: sokoban_emoji
    max_actions_per_traj: 30
    # env_instruction: "You are solving the Sokoban puzzle. You are the player and you need to push all boxes to targets. When you are right next to a box, you can push it by moving in the same direction. You cannot push a box through a wall, and you cannot pull a box. The answer should be a sequence of actions, like <answer>Right || Right || Up</answer>"
    # max_tokens: 100
    env_config:
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 30
      search_depth: 30
      action_lookup:
        1: "⬆️"
        2: "⬇️"
        3: "⬅️"
        4: "➡️"
  
  FakeSokobanEmoji:
    env_type: sokoban_emoji
    max_actions_per_traj: 30
    # env_instruction: "You are solving the Sokoban puzzle. You are the player and you need to push all boxes to targets. When you are right next to a box, you can push it by moving in the same direction. You cannot push a box through a wall, and you cannot pull a box. The answer should be a sequence of actions, like <answer>Right || Right || Up</answer>"
    # max_tokens: 100
    env_config:
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 30
      search_depth: 30
  
  FakeSokobanNumerical:
    env_type: sokoban_numerical
    max_actions_per_traj: 30
    env_config:
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 30
      search_depth: 30

  SimpleSokobanNumerical:
    env_type: sokoban_numerical
    max_actions_per_traj: 30
    env_config:
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 30
      search_depth: 30
      action_lookup:
        1: "1"
        2: "2"
        3: "3"
        4: "4"

  SimpleSokobanAlphabetical:
    env_type: sokoban_alphabetical
    max_actions_per_traj: 30
    env_config:
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 30
      search_depth: 30
      action_lookup:
        1: "A"
        2: "B"
        3: "C"
        4: "D"

  SimpleSokobanRandom:
    env_type: sokoban_random
    max_actions_per_traj: 30
    env_config:
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 30
      search_depth: 30
      action_lookup:
        1: "*"
        2: "&"
        3: "1"
        4: "M"

  SimpleSokobanEmpty:
    env_type: sokoban_empty
    max_actions_per_traj: 30
    env_config:
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 30
      search_depth: 30

  LargerSokoban:
    env_type: sokoban
    max_actions_per_traj: 30
    # env_instruction: "You are solving the Sokoban puzzle. You are the player and you need to push all boxes to targets. When you are right next to a box, you can push it by moving in the same direction. You cannot push a box through a wall, and you cannot pull a box. The answer should be a sequence of actions, like <answer>Right || Right || Up</answer>"
    # max_tokens: 100
    env_config:
      dim_x: 10
      dim_y: 10
      num_boxes: 1
      max_steps: 30
      search_depth: 30

  TwoBoxesSokoban:
    env_type: sokoban
    max_actions_per_traj: 30
    env_config:
      dim_x: 6
      dim_y: 6
      num_boxes: 2
      max_steps: 30
      search_depth: 30

  ComplexSokoban:
    env_type: sokoban
    max_actions_per_traj: 30
    # env_instruction: "You are solving the Sokoban puzzle. You are the player and you need to push all boxes to targets. When you are right next to a box, you can push it by moving in the same direction. You cannot push a box through a wall, and you cannot pull a box. The answer should be a sequence of actions, like <answer>Right || Right || Up</answer>"
    # max_tokens: 100
    env_config:
      dim_x: 10
      dim_y: 10
      num_boxes: 2
      max_steps: 30
      search_depth: 30


  GP-L:
    env_type: gp_l
    max_actions_per_traj: 1
    env_config:
      target: 24
      num_cards: 4
      treat_face_cards_as_10: True
      ood: False
  
  MediumGP-L:
    env_type: gp_l
    max_actions_per_traj: 1
    env_config:
      target: 24
      num_cards: 5
      treat_face_cards_as_10: True
      ood: False

  HardGP-L:
    env_type: gp_l
    max_actions_per_traj: 1
    env_config:
      target: 24
      num_cards: 6
      treat_face_cards_as_10: True
      ood: False

  GP-L-FACE-CARDS-AS-REGULAR:
    env_type: gp_l
    max_actions_per_traj: 1
    env_config:
      target: 24
      num_cards: 4
      ood: True
      treat_face_cards_as_10: False

  GP-L-FACE-CARDS-AS-10:
    # GP-L environment with face cards always treated as 10 when calculating rewards, but rendered as 11, 12, and 13 respectively
    # to test if there is any shortcut learning.
    env_type: gp_l_face_cards_as_10
    max_actions_per_traj: 1
    env_config:
      target: 24
      num_cards: 4
      ood: True
      treat_face_cards_as_10: True
      

  ALFWorld:
    env_type: alfworld
    max_actions_per_traj: 30
    env_config: 
      config_path: "YOUR_ALFWORLD_CONFIG"
      # train_eval: "eval_out_of_distribution"
      # train_eval: "eval_in_distribution"
      train_eval: "eval"
    