#!/bin/bash

# Define text colors
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color

# Help function
show_help() {
  echo -e "${CYAN}===== Sequential Normalization LLM Run Script =====${NC}"
  echo "Usage: ./llm_run.sh [options]"
  echo ""
  echo "Options:"
  echo "  --help             Display this help message"
  echo "  --model [name]     Directly specify model name"
  echo "  --model_run        Add model run flag for initial processing"
  echo "  --process_missing  Add process missing items flag"
  echo ""
  echo "Note: If model_run is disabled, process_missing will be automatically enabled"
  echo ""
  echo "Examples:"
  echo "  ./llm_run.sh --run_batch --model gpt-4o-mini-batchapi"
  echo "  ./llm_run.sh --model o3-mini --model_run"
  echo "  ./llm_run.sh --model claude-3-7-sonnet-20250219 --process_missing"
  echo ""
}

# Initialize variables
run_batch=false
model=""
model_run=false
process_missing=false
input_path=""
all_eval=false

# Process command line arguments
while [[ $# -gt 0 ]]; do
  case $1 in
    --help)
      show_help
      exit 0
      ;;
    --run_batch)
      run_batch=true
      shift
      ;;
    --model)
      model="$2"
      shift 2
      ;;
    --model_run)
      model_run=true
      shift
      ;;
    --process_missing)
      process_missing=true
      shift
      ;;
    --all_eval)
      all_eval=true
      shift
      ;;
    *)
      echo -e "${YELLOW}Warning: Unknown parameter: $1${NC}"
      shift
      ;;
  esac
done

# Display banner
echo -e "${CYAN}┌────────────────────────────────────────────────┐${NC}"
echo -e "${CYAN}│    Sequential Normalization LLM Run Script     │${NC}"
echo -e "${CYAN}└────────────────────────────────────────────────┘${NC}"

# Find and list available SR files
echo -e "\n${BLUE}Available SR Files:${NC}"
sr_files=()
sr_paths=()

# Add default choices
sr_files+=("gold_SR.csv (Gold Standard)")
sr_paths+=("../dataset/gold_SR.csv")

# Find SR files in benchmark directory
benchmark_files=$(find ../benchmark -name "*_SR.csv" -o -name "*_generated_reports_v*.csv" 2>/dev/null)
i=1
for file in $benchmark_files; do
  filename=$(basename "$file")
  sr_files+=("$filename")
  sr_paths+=("$file")
  i=$((i+1))
done

# Find SR files in eval directory
eval_files=$(find ./eval -name "pred_SR_df.csv" 2>/dev/null)
for file in $eval_files; do
  # Get relative path for display
  rel_path=$(echo "$file" | sed 's|./eval/||')
  sr_files+=("$rel_path")
  sr_paths+=("$file")
  i=$((i+1))
done

# Display available SR files
for i in "${!sr_files[@]}"; do
  echo "  $((i+1))) ${sr_files[$i]}"
done

# Ask user to select SR file
read -p "Select SR file number (default: 1): " sr_choice
if [[ -z $sr_choice ]]; then
  sr_choice=1
fi

if [[ $sr_choice -ge 1 && $sr_choice -le ${#sr_files[@]} ]]; then
  selected_index=$((sr_choice-1))
  input_path="${sr_paths[$selected_index]}"
  echo -e "${GREEN}Selected: ${sr_files[$selected_index]}${NC}"
else
  echo -e "${YELLOW}Invalid choice. Using default.${NC}"
  input_path="${sr_paths[0]}"
fi

# Then ask about model run
if [ "$model_run" = false ]; then
  echo -e "\n${BLUE}Processing Type Selection:${NC}"
  read -p "Enable LLM run option for initial processing? (Y/n): " enable_model_run
  if [[ -z $enable_model_run ]] || [[ $enable_model_run =~ ^[Yy]$ ]]; then
    model_run=true
  else
    # Automatically enable process_missing if model_run is disabled
    process_missing=true
  fi
fi

# Only ask about process_missing if model_run is true
if [ "$model_run" = true ] && [ "$process_missing" = false ]; then
  read -p "Enable process missing items option? (Y/n): " enable_process_missing
  if [[ -z $enable_process_missing ]] || [[ $enable_process_missing =~ ^[Yy]$ ]]; then
    process_missing=true
  fi
fi

# Ask about all_eval
read -p "Run on all evaluations (not just subset)? (y/N): " enable_all_eval
if [[ $enable_all_eval =~ ^[Yy]$ ]]; then
  all_eval=true
fi

if [ -z "$model" ]; then
  echo -e "\n${BLUE}Model Selection:${NC}"
  # Regular processing model options (always use these regardless of batch setting)
  echo "  1) gpt-4.1"
  echo "  2) claude-3-7-sonnet-20250219"
  echo "  3) qwen3-235b-a22b"
  echo "  4) deepseek-v3-0324"
  echo "  5) llama4-maverick-instruct-basic"
  echo "  6) gemini-2.5-flash-preview-04-17" # Added gemini model option
  read -p "Select model number (default: 1): " model_choice
  
  case ${model_choice:-1} in
    1) model="gpt-4.1" ;;
    2) model="claude-3-7-sonnet-20250219" ;;
    3) model="qwen3-235b-a22b" ;;
    4) model="deepseek-v3-0324" ;;
    5) model="llama4-maverick-instruct-basic" ;;
    6) model="gemini-2.5-flash-preview-04-17" ;; # Added gemini model option
    *) echo -e "${YELLOW}Invalid choice. Setting to default (gpt-4.1).${NC}"; model="gpt-4.1" ;;
  esac
fi

# Build command
cmd="python llm_run.py --LLM_name $model --input_path \"$input_path\""

if [ "$run_batch" = true ]; then
  cmd="$cmd --batch_process"
fi

if [ "$model_run" = true ]; then
  cmd="$cmd --model_run"
fi

if [ "$process_missing" = true ]; then
  cmd="$cmd --process_missing"
fi

if [ "$all_eval" = true ]; then
  cmd="$cmd --all_eval"
fi

# Display execution summary
echo -e "\n${GREEN}=== Execution Settings ====${NC}"
echo -e "${BLUE}Input Path:${NC} $input_path"
echo -e "${BLUE}Model:${NC} $model"
echo -e "${BLUE}Initial Processing:${NC} $([ "$model_run" = true ] && echo "Enabled" || echo "Disabled")"
echo -e "${BLUE}Process Missing:${NC} $([ "$process_missing" = true ] && echo "Enabled" || echo "Disabled")"
echo -e "${BLUE}All Evaluations:${NC} $([ "$all_eval" = true ] && echo "Enabled" || echo "Disabled")"

# Ask for confirmation
echo -e "\n${YELLOW}Executing the following command:${NC}"
echo -e "${CYAN}$cmd${NC}"
read -p "Continue? (Y/n): " confirm

if [[ -n $confirm ]] && [[ $confirm =~ ^[Nn]$ ]]; then
  echo -e "\n${YELLOW}Operation cancelled.${NC}"
  exit 0
fi

# Execute command
echo -e "\n${GREEN}Executing command...${NC}"
eval $cmd

# Completion message
echo -e "\n${GREEN}Operation completed!${NC}"