defaults:
  - default
  - _self_

save_path: '/home/save_path' # the path to save the model checkpoint/trajectory_outputs

# note that this is the training batch size
batch_size: 4                # Micro-batch size per GPU
grad_accum_steps: 2          # Number of gradient accumulation steps

actor_epochs: 1 # number of epochs to update the actor
epochs: 1
lm_lr: 4e-6

train_tasks: "../tasks/webvoyager_train_datajsonl"
test_tasks: "../tasks/webvoyager_test_data.jsonl"

prompt_path: "prompts/webvoyager.json"
evaluator_prompt_path: "prompts/evaluator_prompt.txt"

parallel_option: "single"
algorithm: "filteredbc"
agent_name: "gemma-vllm"
policy_lm:  "google/gemma-3-12b-it"
max_new_tokens: 1024
vllm_tensor_parallel_size: 4
max_attached_imgs: 3

eval_at_start: False
eval_during_training: True
online: True

save_freq: 1
eval_freq: 1

rollout_size: 64
actor_trajectories: 512
safe_batch_size: 64

env_config:
  batch_size: 64
  max_iter: 20
  use_webarena_eval: False
  do_eval: True
  verbose: False
  use_rich_actree: False
  memory: False

test_env_config:
  batch_size: 26
  max_iter: 30
  use_webarena_eval: False
  do_eval: True
  verbose: False
  use_rich_actree: False
  memory: False

# wandb logging
use_wandb: True
wandb_key: ''
entity_name: ''
project_name: ''
run_name: 'webvoyager_rl' # actually online
