set -e

# Configuration
MODEL_PATH="Qwen/Qwen2-VL-7B-Instruct"
MODEL_NAME="qwen2_vl_tasm"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)

# Setup environment
export PYTHONPATH="$(pwd)/lmms-eval:$(pwd)/transformers/src:$PYTHONPATH"

# Parse command line arguments
BENCHMARK=${1:-"illusionvqa"}  # Default to illusionvqa if no argument

# Function to print usage
usage() {
    echo "Usage: $0 [BENCHMARK]"
    echo ""
    echo "Available benchmarks:"
    echo "  illusionvqa  - IllusionVQA Soft Localization (default)"
    echo "  okvqa        - OK-VQA Validation"
    echo "  screenspot   - ScreenSpot Recognition Test"
    echo "  youcook2     - YouCook2 Validation"
    echo "  all          - Run all benchmarks sequentially"
    echo ""
    echo "Example:"
    echo "  $0 illusionvqa"
    echo "  $0 all"
    exit 1
}

# Function to run IllusionVQA
run_illusionvqa() {
    echo "============================================"
    echo "TASM - IllusionVQA Evaluation"
    echo "============================================"
    
    OUTPUT_PATH="./logs/tasm_illusionvqa_${TIMESTAMP}"
    mkdir -p "${OUTPUT_PATH}"
    
    MODEL_ARGS="pretrained=${MODEL_PATH}"
    MODEL_ARGS="${MODEL_ARGS},num_fewshot=20"
    MODEL_ARGS="${MODEL_ARGS},task_vector_method=combined"
    MODEL_ARGS="${MODEL_ARGS},target_compression_ratio=0.2"
    
    accelerate launch --num_processes 1 -m lmms_eval \
        --model ${MODEL_NAME} \
        --model_args "${MODEL_ARGS}" \
        --tasks illusionvqa_soft_localization_fewshot \
        --batch_size 4 \
        --output_path ${OUTPUT_PATH} \
        --log_samples
    
    echo "Results saved to: ${OUTPUT_PATH}"
}

# Function to run OK-VQA
run_okvqa() {
    echo "============================================"
    echo "TASM - OK-VQA Evaluation"
    echo "============================================"
    
    OUTPUT_PATH="./logs/tasm_okvqa_${TIMESTAMP}"
    mkdir -p "${OUTPUT_PATH}"
    
    python -m lmms_eval \
        --model ${MODEL_NAME} \
        --model_args pretrained=${MODEL_PATH},num_fewshot=20,task_vector_method=combined,target_compression_ratio=0.2 \
        --tasks ok_vqa_val2014_fewshot \
        --batch_size 1 \
        --log_samples \
        --seed 0,1234,1234,42 \
        --log_samples_suffix tasm_okvqa \
        --output_path ${OUTPUT_PATH} \
        --verbosity=INFO
    
    echo "Results saved to: ${OUTPUT_PATH}"
}

# Function to run ScreenSpot
run_screenspot() {
    echo "============================================"
    echo "TASM - ScreenSpot Evaluation"
    echo "============================================"
    
    OUTPUT_PATH="./logs/tasm_screenspot_${TIMESTAMP}"
    mkdir -p "${OUTPUT_PATH}"
    
    python -m lmms_eval \
        --model ${MODEL_NAME} \
        --model_args pretrained=${MODEL_PATH},max_pixels=602112,use_task_vector=True,use_token_merging=True,use_dynamic_memory=True \
        --tasks screenspot_rec_test \
        --batch_size 1 \
        --log_samples \
        --log_samples_suffix tasm_screenspot \
        --output_path ${OUTPUT_PATH}
    
    echo "Results saved to: ${OUTPUT_PATH}"
}

# Function to run YouCook2
run_youcook2() {
    echo "============================================"
    echo "TASM - YouCook2 Evaluation"
    echo "============================================"
    
    OUTPUT_PATH="./logs/tasm_youcook2_${TIMESTAMP}"
    mkdir -p "${OUTPUT_PATH}"
    
    python -m lmms_eval \
        --model ${MODEL_NAME} \
        --model_args pretrained=${MODEL_PATH},max_pixels=602112,use_task_vector=True,use_token_merging=True,use_dynamic_memory=True \
        --tasks youcook2_val \
        --batch_size 1 \
        --num_fewshot 20 \
        --log_samples \
        --log_samples_suffix tasm_youcook2 \
        --output_path ${OUTPUT_PATH}
    
    echo "Results saved to: ${OUTPUT_PATH}"
}

# Main execution
case ${BENCHMARK} in
    illusionvqa)
        run_illusionvqa
        ;;
    okvqa)
        run_okvqa
        ;;
    screenspot)
        run_screenspot
        ;;
    youcook2)
        run_youcook2
        ;;
    all)
        echo "Running all benchmarks..."
        run_illusionvqa
        echo ""
        run_okvqa
        echo ""
        run_screenspot
        echo ""
        run_youcook2
        echo ""
        echo "============================================"
        echo "All benchmarks completed!"
        echo "============================================"
        ;;
    -h|--help)
        usage
        ;;
    *)
        echo "Error: Unknown benchmark '${BENCHMARK}'"
        echo ""
        usage
        ;;
esac

echo "Done!"
