custom_envs:
  SimpleSokoban:
    env_type: sokoban
    max_actions_per_traj: 10 # used in environment state manager to control the actual max actions executed per trajectory
    env_instruction: "You are solving the Sokoban puzzle. You are the player and you need to push all boxes to targets. When you are right next to a box, you can push it by moving in the same direction. You cannot push a box through a wall, and you cannot pull a box. The answer should be a sequence of actions, like <answer>Right || Right || Up</answer>"
    max_tokens: 100 # used to curate llm prompt "max words", not used for rollout
    env_config: # keys should be a subset of SokobanConfig
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 100

  LargerSokoban:
    env_type: sokoban
    max_actions_per_traj: 10
    env_instruction: "You are solving the Sokoban puzzle. You are the player and you need to push all boxes to targets. When you are right next to a box, you can push it by moving in the same direction. You cannot push a box through a wall, and you cannot pull a box. The answer should be a sequence of actions, like <answer>Right || Right || Up</answer>"
    max_tokens: 100
    env_config:
      dim_x: 8
      dim_y: 8
      num_boxes: 2
      max_steps: 100
      search_depth: 10
  
  SokobanDifferentGridVocab:
    env_type: sokoban
    max_actions_per_traj: 10
    env_instruction: "You are solving the Sokoban puzzle. You are the player and you need to push all boxes to targets. When you are right next to a box, you can push it by moving in the same direction. You cannot push a box through a wall, and you cannot pull a box. The answer should be a sequence of actions, like <answer>Right || Right || Up</answer>"
    max_tokens: 100
    env_config: # keys should be a subset of SokobanConfig
      search_depth: 30
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 100
      grid_lookup: {0: "W", 1: ".", 2: "G", 3: "C", 4: "B", 5: "A", 6: "@"}
      grid_vocab: {"W": "wall", ".": "empty", "G": "target", "C": "box on target", "B": "box", "A": "player", "@": "player on target"}

  VisualSimpleSokoban:
    env_type: sokoban
    max_actions_per_traj: 10
    env_instruction: "You are solving the Sokoban puzzle. You are the player and you need to push all boxes to targets. When you are right next to a box, you can push it by moving in the same direction. You cannot push a box through a wall, and you cannot pull a box. The answer should be a sequence of actions, like <answer>Right || Right || Up</answer>"
    max_tokens: 100
    env_config: # keys should be a subset of SokobanConfig
      dim_x: 6
      dim_y: 6
      num_boxes: 1
      max_steps: 100
      render_mode: "rgb_array"

  # Alfworld:
  #   env_type: alfworld
  #   env_instruction: "You are solving the Alfworld puzzle. "
  #   env_config: null

  Countdown:
    env_type: countdown
    max_actions_per_traj: 1
    env_instruction: "You are solving the Countdown puzzle. You should use the num list to create an equation that equals the target. Example answer format: <think> To find an equation using [3, 5, 2] to get 4. Let's check 2 + 5 = 7, 7 - 3 = 4. So the answer is 2 + 5 - 3 = 4. </think><answer>2 + 5 - 3</answer>"
    max_tokens: 100
    env_config: null

  Bandit:
    env_type: bandit
    max_actions_per_traj: 1
    env_instruction: ""
    max_tokens: 100
    env_config: 
      lo_arm_name: "Phoenix"
      hi_arm_name: "Dragon"

  BanditTest:
    env_type: bandit
    max_actions_per_traj: 1
    env_instruction: ""
    max_tokens: 100
    env_config: 
      lo_arm_name: "Trader"
      hi_arm_name: "Librarian"

  FrozenLake:
    env_type: frozen_lake
    max_actions_per_traj: 10
    env_instruction: "You are solving the FrozenLake puzzle. Forbid the whole and go to the target. You may move to the unintended direction due to the slippery ice. Example answer format: <think>To forbid the hole and go to the target, I should go left then go up.</think><answer>Left || Up</answer>"
    max_tokens: 100
    env_config: null

  MetamathQA:
    env_type: metamathqa
    max_actions_per_traj: 5
    env_instruction: "You are solving Math problems. "
    max_tokens: 100
    env_config: null

  MetamathQA_1Turn:
    env_type: metamathqa
    max_actions_per_traj: 1
    env_instruction: "You are solving Math problems. "
    max_tokens: 100
    env_config: null

  MetamathQA_2Turn:
    env_type: metamathqa
    max_actions_per_traj: 2
    env_instruction: "You are solving Math problems. "
    max_tokens: 100
    env_config: null

  MetamathQA_4Turn:
    env_type: metamathqa
    max_actions_per_traj: 4
    env_instruction: "You are solving Math problems. "
    max_tokens: 100
    env_config: null

  MetamathQA_6Turn:
    env_type: metamathqa
    max_actions_per_traj: 6
    env_instruction: "You are solving Math problems. "
    max_tokens: 100
    env_config: null

  MetamathQA_8Turn:
    env_type: metamathqa
    max_actions_per_traj: 8
    env_instruction: "You are solving Math problems. "
    max_tokens: 100
    env_config: null

  MetamathQA_10Turn:
    env_type: metamathqa
    max_actions_per_traj: 10
    env_instruction: "You are solving Math problems. "
    max_tokens: 100
    env_config: null

  WebShop:
    env_type: webshop
    max_actions_per_traj: 10
    env_instruction: "You are browsing an online shop. Based on the instruction, find the product that close to the production description. You need to read the website and decide what action to take next until buying a product. Available actions depends on the page: in the search page you can search keywords, in the search result page you can click an item url or click[next >] to navigate to next page, in the product page you can click[description] or click[features] to see the details, click[blue] or click[x-large] to choose size and colors, click[buy now] when you decided to buy the product, click[back to search] to return to search page. You should only choose action from the available actions list.  Example process: I need a gingko light and 20x20 pillow cover that is hand painted. First search[gingko light 20x20 pillow cover hand painted], answer format: <answer>search[blanket with fleece throw]</answer>. Valid answer is search[<keywords>] or click[<clickable>]."
    # env_instruction: >
    #   You are browsing an online shop. Based on the instruction, find the product 
    #   that best matches the production description. You need to iteratively take 
    #   actions(search or click) in the browser and buy the chosen product. Example 
    #   process: 
    #   Instruction: Find me machine wash men's t-shirts with long sleeve with color: black, and size: xx-large big tall, and price lower than 50.00 dollars
    #   WebShop [SEP] Instruction: [SEP] Find me machine wash men's t-shirts with long sleeve with color: black, and size: xx-large big tall, and price lower than 50.00 dollars [SEP] Search
    #   Available actions: ['search[<content>]']
    #   <think>First search for the big catagory: machine wash men's t-shirts with long sleeve. Do not search for color, size or price, because they will be on the search result or product page</think><answer>search[machine wash men's t-shirts with long sleeve]</answer>
    #   Instruction: [SEP] Find me machine wash men's t-shirts with long sleeve with color: black, and size: xx-large big tall, and price lower than 50.00 dollars [SEP] Back to Search [SEP] Page 1 (Total results: 50) [SEP] Next > [SEP] B09QQP3356 [SEP] HAUKLIE Men's Sports Waffle Ribbed Polo Shirts Summer Short Sleeve Cotton Muscle Quarter-Zip Henley T-Shirt Tunics Tops [SEP] $10.99 [SEP] B09Q8RD8YN [SEP] Bungo Stray Anime Dogs Anime Character, Long Sleeve, Sweatshirt, Hoodie, T shirt [SEP] $19.99 [SEP] B09QGK5XHZ [SEP] WENKOMG1 Men's Long Sleeve Undershirt with Mask Turtleneck Hooded T-Shirt Solid Color Workout Tops Zipper Side Slit Shirts Slim Fit Sweatshirt Spring/Summer Tee Shirts(Gray,) [SEP] $8.39 [SEP] B09QQJJ3KM [SEP] One Lucky Teacher St Patrick Day Shamrock Tee Teachers Custom Personalized Unisex T-Shirts Long Sleeve Hoodie Sweatshirt Gifts [SEP] $100.0 [SEP] B09S3BN15C [SEP] Mens Linen Shirt,Men's Striped Shirts Casual Short Sleeve Button Down Shirts Regular Fit Hawaiian Shirts Beach Tees Tops [SEP] $3.78 to $11.38 [SEP] B09ND9DP7J [SEP] InterestPrint Gold Horse Pattern Men's 2-Piece Sleepwear Set, Long Sleeve Shirt with Pants Loungewear [SEP] $43.59 [SEP] B09T756KQ5 [SEP] WENKOMG1 Men's Crewneck Goth Tee Shirts Skull Print Tops Spring/Summer Long Sleeve Sports T-Shirt Baggy Y2K Soft Streetwear [SEP] $4.09 to $8.09 [SEP] B09Q67H373 [SEP] JSPOYOU 2022 Newly T-Shirt for Mens Funny 3D Graphics Pattern Crewneck Short Sleeve Tees Big and Tall Summer Casual Comfy Top [SEP] $1.99 to $8.99 [SEP] B09R9YCM6R [SEP] XXBR Summer T-shirts for Mens, Soldier Short Sleeve 3D Street Vintage Printed Shirt Slim Fit Muscle Casual Tee Tops [SEP] $8.98 to $11.99 [SEP] B09KLQLLT2 [SEP] Long Sleeve Superhero T Shirt Tank Top Mens Compression Shirt Men Workout Fitness Gym Shirt [SEP] $19.99
    #   Available actions: ['click[back to search]', 'click[next >]', 'click[b09qqp3356]', 'click[b09q8rd8yn]', 'click[b09qgk5xhz]', 'click[b09qqjj3km]', 'click[b09s3bn15c]', 'click[b09nd9dp7j]', 'click[b09t756kq5]', 'click[b09q67h373]', 'click[b09r9ycm6r]', 'click[b09klqllt2]']
    #   <think>Product b09klqllt2 is a Men's Long Sleeve Superhero T Shirt priced 19.99$, which satisfies my need</think><answer>click[b09klqllt2]</answer>
    #   Instruction: [SEP] Find me machine wash men's t-shirts with long sleeve with color: black, and size: xx-large big tall, and price lower than 50.00 dollars [SEP] Back to Search [SEP] < Prev [SEP] color [SEP] black | blue [SEP] blue [SEP] blue | red [SEP] bronze [SEP] brown [SEP] gold [SEP] green [SEP] red [SEP] red | blue [SEP] yellow [SEP] grey | red [SEP] size [SEP] small [SEP] medium [SEP] large [SEP] x-large [SEP] xx-large [SEP] Long Sleeve Superhero T Shirt Tank Top Mens Compression Shirt Men Workout Fitness Gym Shirt [SEP] Price: $19.99 [SEP] Rating: N.A. [SEP] Description [SEP] Features [SEP] Reviews [SEP] Buy Now
    #   Available actions: ['click[back to search]', 'click[< prev]', 'click[description]', 'click[features]', 'click[reviews]', 'click[buy now]', 'click[black | blue]', 'click[blue]', 'click[blue | red]', 'click[bronze]', 'click[brown]', 'click[gold]', 'click[green]', 'click[red]', 'click[red | blue]', 'click[yellow]', 'click[grey | red]', 'click[small]', 'click[medium]', 'click[large]', 'click[x-large]', 'click[xx-large]']
    #   <think>Produce description and price looks good, now select the right color</think><answer>click[black | blue]</answer>
    #   Instruction: [SEP] Find me machine wash men's t-shirts with long sleeve with color: black, and size: xx-large big tall, and price lower than 50.00 dollars [SEP] Back to Search [SEP] < Prev [SEP] color [SEP] black | blue [SEP] blue [SEP] blue | red [SEP] bronze [SEP] brown [SEP] gold [SEP] green [SEP] red [SEP] red | blue [SEP] yellow [SEP] grey | red [SEP] size [SEP] small [SEP] medium [SEP] large [SEP] x-large [SEP] xx-large [SEP] Long Sleeve Superhero T Shirt Tank Top Mens Compression Shirt Men Workout Fitness Gym Shirt [SEP] Price: $19.99 [SEP] Rating: N.A. [SEP] Description [SEP] Features [SEP] Reviews [SEP] Buy Now
    #   Available actions: ['click[back to search]', 'click[< prev]', 'click[description]', 'click[features]', 'click[reviews]', 'click[buy now]', 'click[black | blue]', 'click[blue]', 'click[blue | red]', 'click[bronze]', 'click[brown]', 'click[gold]', 'click[green]', 'click[red]', 'click[red | blue]', 'click[yellow]', 'click[grey | red]', 'click[small]', 'click[medium]', 'click[large]', 'click[x-large]', 'click[xx-large]']
    #   <think>I already selected color, now click size xx-large</think><answer>click[xx-large]</answer>
    #   Instruction: [SEP] Find me machine wash men's t-shirts with long sleeve with color: black, and size: xx-large big tall, and price lower than 50.00 dollars [SEP] Back to Search [SEP] < Prev [SEP] color [SEP] black | blue [SEP] blue [SEP] blue | red [SEP] bronze [SEP] brown [SEP] gold [SEP] green [SEP] red [SEP] red | blue [SEP] yellow [SEP] grey | red [SEP] size [SEP] small [SEP] medium [SEP] large [SEP] x-large [SEP] xx-large [SEP] Long Sleeve Superhero T Shirt Tank Top Mens Compression Shirt Men Workout Fitness Gym Shirt [SEP] Price: $19.99 [SEP] Rating: N.A. [SEP] Description [SEP] Features [SEP] Reviews [SEP] Buy Now
    #   Available actions: ['click[back to search]', 'click[< prev]', 'click[description]', 'click[features]', 'click[reviews]', 'click[buy now]', 'click[black | blue]', 'click[blue]', 'click[blue | red]', 'click[bronze]', 'click[brown]', 'click[gold]', 'click[green]', 'click[red]', 'click[red | blue]', 'click[yellow]', 'click[grey | red]', 'click[small]', 'click[medium]', 'click[large]', 'click[x-large]', 'click[xx-large]']
    #   <think>I already selected color and size, now click buy now</think><answer>click[buy now]</answer>
    max_tokens: 200
    env_config: null
    # Use full dataset below
    # env_config: 
    #   data_path: /root/RAGEN-Dev/external/webshop-minimal/webshop_minimal
    #   file_path: /root/RAGEN-Dev/external/webshop-minimal/webshop_minimal/data/items_shuffle.json

