#!/bin/bash
#==============================================================================
# SimpleQA Multiple Run Script
# Run SimpleQA with multiple configurations
#==============================================================================

# Define the Python script path
# to be run from the Knowledge Graph of Things (KGoT) root folder
PYTHON_SCRIPT="benchmarks/baselines/gptswarm/gptswarm_simpleqa.py"

#------------------------------------------------------------------------------
# Configuration
#------------------------------------------------------------------------------

# Define an array of SimpleQA JSON file paths
simpleqa_files=(
    # "SimpleQA/testtt.json"
    "benchmarks/datasets/SimpleQA/test_set_10_percent_seed123456.json"
)

# Define the number of runs (change this to any number you want)
num_runs=1


# Attempt to locate GNU getopt via Homebrew
GETOPT_BIN="$(brew --prefix gnu-getopt 2>/dev/null)/bin/getopt"

# Check if GETOPT_BIN is non-empty and executable; if not, use the system getopt.
if [ -n "$GETOPT_BIN" ] && [ -x "$GETOPT_BIN" ]; then
    GETOPT="$GETOPT_BIN"
else
    GETOPT="$(command -v getopt)"
fi

echo "Using getopt: $GETOPT"

#------------------------------------------------------------------------------
# Handle arguments
#------------------------------------------------------------------------------

# Handle --help flag
if [ "$1" == "--help" ]  || [ "$1" = "-h" ]; then
    echo "Usage: ./run_multiple_gptswarm_simpleqa.sh [OPTIONS]"
    echo ""
    echo "Optional arguments:"
    echo "  --log_folder_base              Directory where logs will be stored (default: logs/[DB_CHOICE]_[CONTROLLER_CHOICE]_[TOOL_CHOICE])"
    echo "  --config_llm_path              Path to LLM configuration file (default: kgot/config_llms.json)"
    echo "  --logger_file_mode             Log file mode (default: a)"
    echo ""
    echo "  --max_iterations               Max iterations for KGoT (default: 7)"
    echo "  --llm_model                    LLM model to use (default: gpt-4o-mini)"
    echo ""
    exit 0
fi


# Initialize empty vars

# Defaults matching the Python script (excepting log_folder_base and simpleqa_file)
MAX_ITERATIONS_DEFAULT=7
LLM_MODEL_DEFAULT="gpt-4o-mini"
# Track values
LOG_FOLDER_BASE=""
MAX_ITERATIONS=""

# Parse CLI arguments
OPTS=$($GETOPT -o "" \
  --long log_folder_base:,config_llm_path:,logger_file_mode:,max_iterations:,llm_model:\
  -n 'run_multiple_gptswarm_simpleqa.sh' -- "$@")

if [ $? != 0 ]; then
    echo "Failed to parse options." >&2
    exit 1
fi

eval set -- "$OPTS"

ARGS=()

while true; do
    case "$1" in
        --log_folder_base) LOG_FOLDER_BASE="$2"; shift 2 ;;
        --max_iterations) MAX_ITERATIONS="$2"; shift 2 ;;
        --llm_model) LLM_MODEL="$2"; shift 2 ;;
        --) shift; break ;;
        *)
            # For all other options, if set, add to ARGS
            if [[ -n "$2" && "$2" != --* ]]; then
                ARGS+=("$1" "$2"); shift 2
            else
                shift
            fi
            ;;
    esac
done




# Use defaults if not explicitly provided
: "${MAX_ITERATIONS:=$MAX_ITERATIONS_DEFAULT}"
: "${LLM_MODEL:=$LLM_MODEL_DEFAULT}"
# Set log_folder_base default 
LOG_FOLDER_BASE_DEFAULT="logs/gptswarm_simpleqa_${LLM_MODEL}"
# Use log_folder_base default if not explicitly provided
: "${LOG_FOLDER_BASE:=$LOG_FOLDER_BASE_DEFAULT}"

echo "KGoT Run Configuration:"
echo "  log_folder_base:     $LOG_FOLDER_BASE"
echo "  llm_model:           $LLM_MODEL"
echo

#------------------------------------------------------------------------------
# Main Script
#------------------------------------------------------------------------------

# Outer loop for the number of runs
for ((run=1; run<=num_runs; run++)); do
    echo "Iteration: $run/$num_runs"

    # Set up log folders for the run based on root folder
    if [[ $num_runs -gt 1 ]]; then
        run_log_folder="${LOG_FOLDER_BASE}/run_${run}"
    else
        run_log_folder="$LOG_FOLDER_BASE"
    fi
    log_folders=()
    categories=()
    for i in "${!simpleqa_files[@]}"; do
        category=$(basename "${simpleqa_files[$i]}" .json)
        categories+=("$category")
        log_folders+=("${run_log_folder}/${category}")
    done

    # Inner loop to iterate over the arrays in parallel
    for i in "${!simpleqa_files[@]}"; do
        simpleqa_file=${simpleqa_files[$i]}
        log_folder=${log_folders[$i]}

        # Extract the base name from the SimpleQA file path to construct the output file name
        base_name=$(basename "$simpleqa_file" .json)
        # output_file="$log_folder/output_${base_name}_${iteration}.txt" # Note that this output file will be overwritten as it does NOT include the time

        echo "Running with SimpleQA file: $simpleqa_file and log folder: $log_folder and it is the [$i-th/${#simpleqa_files[@]}] elements"
        echo "Output will be saved into the log folder in the cmd_log.log file"
        echo
        # Build the Python script command with all arguments
        SCRIPT="$PYTHON_SCRIPT --log_folder_base $log_folder \
        --simpleqa_file $simpleqa_file \
        --max_iterations $MAX_ITERATIONS \
        --llm_model $LLM_MODEL"

        # Add additional arguments from the ARGS array
        SCRIPT="$SCRIPT ${ARGS[@]}"

        echo "Running script:"
        echo $SCRIPT
        echo
        
        # Change the Python version if needed and run the script

        PYTHONPATH=. python3 $SCRIPT
    done
done
