#!/bin/bash
#SBATCH --job-name=codescoring
#SBATCH --partition=a100                      # Use GPU partition "a100"
#SBATCH --gres gpu:2                          # set 4 GPUs per job
#SBATCH -c 32                                 # Number of cores
#SBATCH -N 1                                  # Ensure that all cores are on one machine
#SBATCH -t 4-00:00                            # Maximum run-time in D-HH:MM
#SBATCH --mem=256G                            # Memory pool for all cores (see also --mem-per-cpu)
#SBATCH --output=%j.out                       # File to which STDOUT will be written
#SBATCH --error=%j.err                        # File to which STDERR will be written

# Function to display usage information
usage() {
    echo "Usage: $0 [-c <experiment_config_file>] [-i <part>]"
    echo "  -c <config>     Experiment-specific config file (optional)"
    echo "  -p <part>       Part number to process"
    exit 1
}

# Parse command line arguments
while getopts ":c:p:" opt; do
    case ${opt} in
        c )
            CONFIG_FILE=$OPTARG
            ;;
        p )
            PART=$OPTARG
            ;;
        \? )
            usage
            ;;
    esac
done

# Check if part number is provided
if [ -z "$PART" ]; then
    echo "Error: Part number (-p) must be specified"
    usage
fi

# Set default config file if not provided
if [ -z "$CONFIG_FILE" ]; then
    CONFIG_FILE="exps/datasetgen/configs/default_config.sh"
fi

# Check if config file exists
if [ ! -f "$CONFIG_FILE" ]; then
    echo "Error: Experiment config file '$CONFIG_FILE' not found."
    exit 1
fi

source "$CONFIG_FILE"
if [ -f "exps/datasetgen/scripts/configs/global_config.sh" ]; then
    source "exps/datasetgen/scripts/configs/global_config.sh"
else
    echo "Error: Global config file 'exps/datasetgen/scripts/configs/global_config.sh' not found."
    exit 1
fi

# Override paths with part number
CODESCORING_PROMPT_FILE="${CODESCORING_PROMPTS_DIR}/prompts_${CODESCORING_PROMPT_NAME}__${DATE}_part_${PART}.json"
CODESCORING_RESPONSES_PATH="${CODESCORING_RESPONSES_DIR}/${CODESCORING_PROMPT_NAME}__${CODESCORING_MODEL_NAME_SHORT}__${DATE}_part_${PART}.json"

if [ ! -f "${CODESCORING_RESPONSES_PATH}" ]; then
    echo -e "\n\n---------CODESCORING: Generate Responses for Part ${PART}---------\n\n"
    echo "Executing Python script with the following parameters:"
    echo "Model name: ${CODESCORING_MODEL_NAME}"
    echo "Prompt file: ${CODESCORING_PROMPT_FILE}"
    echo "Output path: ${CODESCORING_RESPONSES_PATH}"
    echo "VLLM batch size: ${VLLM_MAX_NUM_SEQS}"
    echo "Tensor parallel size: ${TENSOR_PARALLEL_SIZE}"
  
    ${PYTHON_QWEN} src/turtlegfx_datagen/codescoring/chat_completion_vllm_codescoring_qwen2vl.py \
        --model_name "${CODESCORING_MODEL_NAME}" \
        --prompt_file "${CODESCORING_PROMPT_FILE}" \
        --max_new_tokens 1024 \
        --do_sample \
        --quantization \
        --top_p 0.001 \
        --temperature 0.1 \
        --vllm_batch_size ${VLLM_MAX_NUM_SEQS} \
        --tensor_parallel_size ${TENSOR_PARALLEL_SIZE} \
        --output_path "${CODESCORING_RESPONSES_PATH}"
    
else
    echo "File ${CODESCORING_RESPONSES_PATH} already exists. Skipping generation."
fi

echo "---------Response Generation Complete---------"


