#!/bin/bash
#==============================================================================
# Auxiliary functions
#==============================================================================
# Create flags with `true` `false` parameter
get_flag() {
    local flag=$1
    local condition=$2
    if [ "$condition" = true ]; then
        echo "--$flag"
    else
        echo ""
    fi
}

#===============================================================================
# Set environment variables
#===============================================================================
export TF_CPP_MIN_LOG_LEVEL=2   # Suppress some tensorflow warnings
export PYTHONPATH=$(pwd)        # Assure correct relative imports; useful if running on slurm

# Export the websites URLs.         
## Use `local_<benchmark>` if deploying sites locally. `demo_<benchmark>` if using author's hosted websites
MODE='local_vwebarena'                # 'demo_vwebarena', 'demo_webarena', 'local_vwebarena', 'local_webarena'
endpoint='localhost'                  # e.g.: website accessible at http://localhost.com => endpoint=localhost. If running on machineA and access a website hosted on machineB, use machineB's IP.
source ./scripts/environments/set_env_variables.sh "$MODE" "$endpoint"

#===============================================================================
# Parametrization for end to end eval
#===============================================================================

#-------------------------------------------------------------------------------
# Tasks
#-------------------------------------------------------------------------------
# Start ID of tasks to evaluate. If smaller than initial ID, default to first task.
test_start_idx=-1             
# End ID of tasks to evaluate. If greater than the number of tasks, will default to the last task.
test_end_idx=1000          

# Path to a file containing a list of task IDs to evaluate. Overwrites `test_start_idx` and `test_end_idx`.
task_list='' 
max_tasks=-1               # Overwrite the number of tasks to evaluate. If <0, or none,  will evaluate all tasks.

shuffle_tasks=$(get_flag 'shuffle_tasks' true)
# Path to directory containing the json files with test config for each task.
test_config_base_dir=""

# Path to a directory HTML files of previous executions; use for reproducing the executions without API calls ("TeacherForcingAgent")
# trajectory_html_path='/home/mashalimay/webarena/WebGUIAgents/experiments/gemini-2.0-exp-flash/base_prev_utterances_critique_not_vague/reddit/htmls'
# trajectory_html_path="/home/mashalimay/webarena/WebGUIAgents/experiments/gemini-2.0-exp-flash/base_prev_utterances_critique_not_vague/shopping/htmls/"

#-------------------------------------------------------------------------------
# Models and Agent Configuration
#-------------------------------------------------------------------------------
# Path to the YAML configuration for the Agent.
agent_config_file=''

# If true, does not caption the text observations of the webpage, even if captioner is provided.
no_caption_text_obs=$(get_flag 'no_caption_text_obs' false)

# Device to hold the captioner model
agent_captioning_model_device='server-cuda'   # cuda, server-cuda, cpu

#-------------------------------------------------------------------------------
# Evaluation
#-------------------------------------------------------------------------------
max_steps=30                            # Default:30. Max number of environment steps allowed. If exceeded, FAIL.
parsing_failure_th=3                    # Default: 3. Number of parsing failures allowed. If exceeded, FAIL.
repeating_action_failure_th=10          # Default: 5. Max number of repeated actions allowed. If exceeded, FAIL.
fuzzy_match_provider='google'           # Default: OpenAI. Defines the provider for fuzzy match evals

#-------------------------------------------------------------------------------
# Prompting
#-------------------------------------------------------------------------------
# Path to raw files to build json prompts.
path_raw_prompts=agent/prompts/raw/base

#-------------------------------------------------------------------------------
# Observation and action config
#-------------------------------------------------------------------------------
# accessibility_tree, accessibility_tree_with_captioner, image_som
observation_type=image_som                          
# Define sizes for the viewport of the browser.
viewport_width=1280                            # Default: 1280
viewport_height=2048                           # Default: 720 for small context window models | 2048 for large context window models
current_viewport_only=$(get_flag 'current_viewport_only' true)   # Default: true

#-------------------------------------------------------------------------------
# Execution config
#-------------------------------------------------------------------------------
SKIP_COOKIES=true                             # If true, skip the creation of autologin cookies.
sleep_after_execution=0.0                       # If > 0, hard sleep after execution; else, automatically wait for the page to stabilize.


#-------------------------------------------------------------------------------
# Rendering and outputs config
#-------------------------------------------------------------------------------
render_screenshot=$(get_flag 'render_screenshot' true)        # Save HTML files with trajectories.
save_trace_enabled=$(get_flag 'save_trace_enabled' false)     # Save traces in the result directory.
render=$(get_flag 'render' false)                             # Displays the browser.
[ "$render" = '--render' ] && slow_mo=100 || slow_mo=0        # Display the browser in slow motion. 100 if rendering the browser, else 0.
log_obs_lens=$(get_flag 'log_obs_lens' false)                  # Logs observation and prompt lengths; note: token counting delays execution; turn it off to accelerate

show_scroll_bar=$(get_flag 'show_scroll_bar' true)

#-------------------------------------------------------------------------------
# Results
#-------------------------------------------------------------------------------
# Save execution traces to this directory; if empty, automatically creates a directory with the current date and time.
result_dir=''

#-------------------------------------------------------------------------------
# LLM deployment config
#-------------------------------------------------------------------------------
# HuggingFace deployment params
deployment_mode='automodel'                     # HuggingFace only. Engine used for deployment. Options: 'tgi', 'automodel', 'vllm'
flash_attn=$(get_flag 'flash_attn' true)        # automodel only: if true, uses flash attention.

# TGI only deployment params
tgi_model_endpoint='http://127.0.0.1:8080'      # Endpoint for hosting the model. Example: 'http://127.0.0.1:8080'
local=$(get_flag 'local' false)                 # If true, will deploy a local tgi server

# VLLM deployment params
eager=$(get_flag 'eager' false)                 # vllm only. Activates Eager mode when using `Transformers`. True uses less memory, but slower.
max_model_len=-1                                # vllm only. If -1, use the default max model length for the model.

manual_input=$(get_flag 'manual_input' false) # If true, actions are inputted manually by the user.


#===============================================================================
# Command line arguments - overwrite parameters above if given in the terminal
#===============================================================================
# usage: ./run.sh -s <start_idx> -e <end_idx> -t <path_to_task_list> -c <config_base_dir> -r   # -r to reset cookies

while getopts ":s:e:t:c:k:d:m:a:r" opt; do
    case $opt in
        s) test_start_idx=$OPTARG ;;        # Start index for the test
        e) test_end_idx=$OPTARG ;;          # End index for the test
        t) task_list=$OPTARG ;;             # Path to list of tasks to evaluate
        c) test_config_base_dir=$OPTARG ;;  # Path to the test config directory
        r) SKIP_COOKIES=true ;;            # Reset Cookies
        k) API_KEY=$OPTARG ;;               # API Key
        d) result_dir=$OPTARG ;;            # Path to the results directory
        m) agent_captioning_model_device=$OPTARG ;;
        a) agent_config_file=$OPTARG ;;
        \?) echo "Invalid option -$OPTARG" >&2 ;;
    esac
done

if [[ -n "$agent_config" ]]; then
    agents_configs="$agent_config"
fi


#===============================================================================
# Evaluation
#===============================================================================
# source ./scripts/set_api_keys.sh $API_KEY

# Create autologin cookies (needs to run only one time)
if [ ! -d .auth ] || [ "$SKIP_COOKIES" = false ]; then
    echo "Creating autologin cookies"    
    ./scripts/environments/autologin_cookies.sh
fi

echo -e "\n==================================\nStart of evaluation\n=================================="
python3 run.py \
    --agent_config_file $agent_config_file \
    --result_dir $result_dir \
    --test_start_idx $test_start_idx \
    --test_end_idx $test_end_idx \
    --test_config_base_dir $test_config_base_dir \
    --viewport_width $viewport_width \
    --viewport_height $viewport_height \
    --max_steps $max_steps \
    --parsing_failure_th $parsing_failure_th \
    --repeating_action_failure_th $repeating_action_failure_th \
    --deployment_mode $deployment_mode \
    --tgi_model_endpoint $tgi_model_endpoint \
    --max_model_len $max_model_len \
    --flash_attn $flash_attn \
    --fuzzy_match_provider $fuzzy_match_provider \
    --observation_type $observation_type \
    --path_raw_prompts $path_raw_prompts \
    --task_list $task_list \
    --agent_captioning_model_device $agent_captioning_model_device \
    --max_tasks $max_tasks \
    --sleep_after_execution $sleep_after_execution \
    $render \
    $render_screenshot \
    $save_trace_enabled \
    $current_viewport_only \
    $local \
    $eager \
    $log_obs_lens \
    $no_caption_text_obs \
    --trajectory_html_path $trajectory_html_path \
    $manual_input \
    $show_scroll_bar \
    $shuffle_tasks

