#!/bin/bash

# Set default values
DATASET="anthropic" # for safety, use "anthropic" or "multijail".
INPUT_DIR=""  # write your input directory
THRESHOLD=0.6
SELF_CONSISTENCY="false"
ASYNC_MODE="true"
MAX_REQ_PER_MIN=3000
MAX_TOKENS_PER_MIN=250000
COORDINATOR_NUM=1 # which coordinator to use
CSRT_THRESHOLD=0.5
MODE="multi" # 'multi' or 'self'

# Parse command line arguments
while [[ $# -gt 0 ]]; do
  case $1 in
    --dataset)
      DATASET="$2"
      shift 2 
      ;;
    --input_dir)
      INPUT_DIR="$2"
      shift 2
      ;;
    --threshold)
      THRESHOLD="$2"
      shift 2
      ;;
    --self_consistency)
      SELF_CONSISTENCY="true"
      shift
      ;;
    --sync)
      ASYNC_MODE="false"
      shift
      ;;
    --max_requests)
      MAX_REQ_PER_MIN="$2"
      shift 2
      ;;
    --max_tokens)
      MAX_TOKENS_PER_MIN="$2"
      shift 2
      ;;
    --mode)
      MODE="$2"
      shift 2
      ;;
    *)
      echo "Unknown option: $1"
      exit 1
      ;;
  esac
done

# Construct self_consistency flag
if [ "$SELF_CONSISTENCY" = "true" ]; then
  SC_FLAG="--self_consistency"
else
  SC_FLAG=""
fi

# Construct async flag
if [ "$ASYNC_MODE" = "true" ]; then
  ASYNC_FLAG="--async_mode"
else
  ASYNC_FLAG=""
fi

# Print configuration
echo "Running evaluation with the following settings:"
echo "  Dataset: $DATASET"
echo "  Input directory: $INPUT_DIR"
echo "  Threshold: $THRESHOLD"
echo "  Self consistency: $SELF_CONSISTENCY"
echo "  Async mode: $ASYNC_MODE"
echo "  Max requests per minute: $MAX_REQ_PER_MIN"
echo "  Max tokens per minute: $MAX_TOKENS_PER_MIN"
echo "  Mode: $MODE"
echo ""

# Run the evaluation scripts

if [ "$MODE" = "multi" ]; then
  echo "========================================="
  echo "Running multi-agent evaluation..."
  echo "========================================="
  python src/evaluation_multi_safety_api.py \
    --dataset "$DATASET" \
    --input_dir "$INPUT_DIR" \
    --threshold "$THRESHOLD" \
    $ASYNC_FLAG \
    --max_requests_per_minute "$MAX_REQ_PER_MIN" \
    --max_tokens_per_minute "$MAX_TOKENS_PER_MIN" \
    --coordinator_num "$COORDINATOR_NUM" \
    --csrt_threshold "$CSRT_THRESHOLD"
fi

if [ "$MODE" = "self" ]; then
  echo "========================================="
  echo "Running self-consistency evaluation..."
  echo "========================================="
  python src/evaluation_self_safety_api.py \
    --dataset "$DATASET" \
    --input_dir "$INPUT_DIR" \
    --threshold "$THRESHOLD" \
    $SC_FLAG \
    $ASYNC_FLAG \
    --max_requests_per_minute "$MAX_REQ_PER_MIN" \
    --max_tokens_per_minute "$MAX_TOKENS_PER_MIN"
fi 