agent: 
  type: naive            # Type of agent to use (e.g., 'naive', 'cot', etc.)
  remember_cot: True     # Whether the agent should remember its reasoning across turns
  max_text_history: 16   # Maximum number of text messages to keep in the history
  max_image_history: 0   # Maximum number of images to keep in the history
  max_cot_history: 1     # Maximum number of chain-of-thought steps to keep in history (if using 'cot' type of agent)
  max_icl_history: 1000   # Maximum number of ICL steps to keep in history (if using 'few_shot' type of agent)
  cache_icl: False

eval:
  prompt_dir: "/u/wchen11/incontext_RL/collect/reflect3.txt"
  output_dir: "results"  # Directory where evaluation results will be saved
  data_dir: "results"    # Directory where evaluation results will be saved
  resume_from: null      # Path to to the incomplete results file to resume an incomplete run
  num_workers: 16        # Number of parallel workers. Increase for faster evaluation if you have enough resources
  num_episodes:          # Minimum number of episodes to run for each environment. You can optionally increase this to get more reliable results
    nle: 5               # Number of episodes for the 'nle' environment
    minihack: 5          # Number of episodes for each 'minihack' task
    babyai: 64           # Number of episodes for each 'babyai' task
    crafter: 10          # Number of episodes for the 'crafter' environment
    babaisai: 64         # Number of episodes for each 'babaisai' task
    textworld: 10        # Number of episodes for each 'textworld' task
  max_steps_per_episode: null   # Max steps per episode; null uses the environment default
  save_trajectories: True       # Whether to save agent trajectories (text only)
  save_images: False            # Whether to save images from the environment
  icl_episodes: 1
  icl_dataset: records
  feedback_on_invalid_action : True       # Whether to provide feedback on invalid actions
  batch_size: 512

client:
  client_name: openai           # LLM client to use (e.g., 'openai', 'gemini', 'claude')
  model_id: gpt-4o              # Model identifier (e.g., 'gpt-4', 'gpt-3.5-turbo')
  base_url: http://localhost:8080/v1   # Base URL for the API (if using a local server)
  generate_kwargs:
    temperature: 1.0            # Sampling temperature. If null the API default temperature is used instead
    max_tokens: 4096            # Max tokens to generate in the response
  timeout: 600                  # Timeout for API requests in seconds
  max_retries: 5                # Max number of retries for failed API calls
  delay: 2                      # Exponential backoff factor between retries in seconds
  alternate_roles: False        # Whether the client requires alternating between the agent and the environment

envs:
  names: babyai #-babaisai-textworld-crafter-nle-minihack   # Environments to evaluate, separated by hyphens
  env_kwargs:
    seed: null                # Random seed; null means a random seed is used
  nle_kwargs:
    character: "@"            # Character representing the agent in NLE
    max_episode_steps: 100_000  # Max steps per episode in NLE
    no_progress_timeout: 150    # Timeout for no progress in NLE
    savedir: null               # Directory to save NLE data; null disables saving
    save_ttyrec_every: 0        # Frequency of saving TTY recordings
    skip_more: True            # Whether to skip the 'more' prompt in NLE
  minihack_kwargs: 
    character: "@"
    max_episode_steps: 100
    penalty_step: -0.01         
    penalty_time: 0.0           
    penalty_mode: constant      
    savedir: null
    save_ttyrec_every: 0
    autopickup: False           
    skip_more: True
  babyai_kwargs:
    num_dists: 0  
    max_steps: 20              
  crafter_kwargs:
    area: [64, 64]              
    view: [9, 9]                
    size: [256, 256]            # Image size in Crafter
    reward: True                            
    seed: null                  
    max_episode_steps: 2000     
  textworld_kwargs:
    objective: True             
    description: True           
    score: True                 
    max_score: True             
    won: True                   
    max_episode_steps: 80       
    textworld_games_path: tw_games  
  babaisai_kwargs: 
    add_ruleset: True
    max_steps: 32   

tasks:
  nle_tasks:
    - NetHackChallenge-v0       # Tasks for the NLE environment

  minihack_tasks:
    - "MiniHack-Boxoban-Hard-v0"
    - "MiniHack-Boxoban-Medium-v0"
    - "MiniHack-MazeWalk-9x9-v0"
    - "MiniHack-MazeWalk-15x15-v0"
    - "MiniHack-Corridor-R3-v0"
    - "MiniHack-CorridorBattle-Dark-v0"
    - "MiniHack-Quest-Easy-v0"
    - "MiniHack-Quest-Medium-v0"

  crafter_tasks:
    - "default"                 # Tasks for Crafter

  babyai_tasks:
    # - "BabyAI-MixedTrainLocal-v0/goto"
    - "BabyAI-MixedTrainLocal-v0/pickup"
    # - "BabyAI-MixedTrainLocal-v0/open"
    # - "BabyAI-MixedTrainLocal-v0/putnext"
    # - "BabyAI-MixedTrainLocal-v0/pick_up_seq_go_to"

  textworld_tasks:
    - "treasure_hunter"
    - "the_cooking_game"
    - "coin_collector"

  babaisai_tasks:
    - "env/make_win-distr_obj_rule"
    - "env/goto_win-distr_obj_rule"
    - "env/goto_win"
    - "env/goto_win-distr_obj"
    - "env/goto_win-distr_rule"
    - "env/goto_win-distr_obj-irrelevant_rule"
    - "env/make_win-distr_obj"
    - "env/make_win-distr_rule"
    - "env/make_win"
    - "env/make_win-distr_obj-irrelevant_rule"
    - "env/two_room-goto_win"
    - "env/two_room-goto_win-distr_obj_rule"
    - "env/two_room-goto_win-distr_rule"
    - "env/two_room-goto_win-distr_obj"
    - "env/two_room-goto_win-distr_obj-irrelevant_rule"
    - "env/two_room-goto_win-distr_win_rule"
    - "env/two_room-break_stop-goto_win-distr_obj_rule"
    - "env/two_room-break_stop-goto_win-distr_obj"
    - "env/two_room-break_stop-goto_win-distr_rule"
    - "env/two_room-break_stop-goto_win-distr_obj-irrelevant_rule"
    - "env/two_room-break_stop-goto_win"
    - "env/two_room-maybe_break_stop-goto_win-distr_obj_rule"
    - "env/two_room-maybe_break_stop-goto_win"
    - "env/two_room-maybe_break_stop-goto_win-distr_obj"
    - "env/two_room-maybe_break_stop-goto_win-distr_rule"
    - "env/two_room-maybe_break_stop-goto_win-distr_obj-irrelevant_rule"
    - "env/two_room-make_win-distr_obj_rule"
    - "env/two_room-make_win-distr_rule"
    - "env/two_room-make_win"
    - "env/two_room-make_win-distr_obj-irrelevant_rule"
    - "env/two_room-make_win-distr_obj"
    - "env/two_room-make_win-distr_win_rule"
    - "env/two_room-break_stop-make_win-distr_obj_rule"
    - "env/two_room-break_stop-make_win-distr_rule"
    - "env/two_room-break_stop-make_win"
    - "env/two_room-break_stop-make_win-distr_obj-irrelevant_rule"
    - "env/two_room-break_stop-make_win-distr_obj"
    - "env/two_room-make_you"
    - "env/two_room-make_you-make_win"
    - "env/two_room-make_wall_win"

hydra:
  run:
    dir: .                     # Set the working directory to the current directory
  output_subdir: null          # Do not use an output subdirectory
