#!/bin/bash
# Target model name list
TARGET_MODELS=("claude-3-7-sonnet-latest")

# Limit on number of prompts to process (for testing). Uncomment to enable
# LIMIT="--limit 1"

# Skip article cleaning step. Uncomment to enable
# SKIP_CLEANING="--skip_cleaning"

# Only process specific language data. Uncomment to enable
# ONLY_ZH="--only_zh"  # Only process Chinese data
# ONLY_EN="--only_en"  # Only process English data

# Specify log output file
OUTPUT_LOG_FILE="output.log"

# Clear log file
echo "Starting benchmark tests, log output to: $OUTPUT_LOG_FILE" > "$OUTPUT_LOG_FILE"

# --- Execution section ---

# Loop through each model in the target models list
for TARGET_MODEL in "${TARGET_MODELS[@]}"; do
  echo "Running benchmark test for target model: $TARGET_MODEL"
  echo -e "\n\n========== Starting test for $TARGET_MODEL ==========\n" >> "$OUTPUT_LOG_FILE"

  # Base command for current target model
  PYTHON_CMD="python -u deepresearch_bench.py \"$TARGET_MODEL\""

  # Add optional parameters
  if [[ -n "$LIMIT" ]]; then
    PYTHON_CMD="$PYTHON_CMD $LIMIT"
  fi

  if [[ -n "$SKIP_CLEANING" ]]; then
    PYTHON_CMD="$PYTHON_CMD $SKIP_CLEANING"
  fi
  
  if [[ -n "$ONLY_ZH" ]]; then
    PYTHON_CMD="$PYTHON_CMD $ONLY_ZH"
  fi
  
  if [[ -n "$ONLY_EN" ]]; then
    PYTHON_CMD="$PYTHON_CMD $ONLY_EN"
  fi

  # Execute command and append stdout and stderr to single log file
  echo "Executing command: $PYTHON_CMD" | tee -a "$OUTPUT_LOG_FILE"
  eval $PYTHON_CMD >> "$OUTPUT_LOG_FILE" 2>&1

  echo "Completed benchmark test for target model: $TARGET_MODEL"
  echo -e "\n========== Test completed for $TARGET_MODEL ==========\n" >> "$OUTPUT_LOG_FILE"
  echo "--------------------------------------------------"
done

echo "All benchmark tests completed. Logs saved in $OUTPUT_LOG_FILE"
