#!/bin/bash
#SBATCH --job-name=coderelabel
#SBATCH --partition=a100                      # Use GPU partition "a100"
#SBATCH --gres gpu:2                          # set 4 GPUs per job
#SBATCH -c 32                                 # Number of cores
#SBATCH -N 1                                  # Ensure that all cores are on one machine
#SBATCH -t 4-00:00                            # Maximum run-time in D-HH:MM
#SBATCH --mem=250G                             # Memory pool for all cores (see also --mem-per-cpu)
#SBATCH --output=%j.out                       # File to which STDOUT will be written
#SBATCH --error=%j.err                        # File to which STDERR will be written

# Function to display usage information
usage() {
    echo "Usage: $0 [-c <experiment_config_file>] [-p <part>]"
    echo "  -c <config>     Experiment-specific config file (optional)"
    echo "  -p <part>       Part number to process"
    exit 1
}

# Parse command line arguments
while getopts ":c:p:" opt; do
    case ${opt} in
        c )
            CONFIG_FILE=$OPTARG
            ;;
        p )
            PART=$OPTARG
            ;;
        \? )
            usage
            ;;
    esac
done

############################################################
# Source configuration files
############################################################
if [ -z "$CONFIG_FILE" ]; then
    CONFIG_FILE="exps/datasetgen/scripts/configs/dataset_relabel_config.sh"
fi

# Check if config file exists
if [ ! -f "$CONFIG_FILE" ]; then
    log_error "Error: Experiment config file '$CONFIG_FILE' not found."
    exit 1
fi

source "$CONFIG_FILE"

##############################
# ENV
##############################


# Variables
PROMPT_FILE="${PROMPTS_DIR}/prompts_${PROMPT_NAME}_part_${PART}.json"

# RELABEL: Generate Responses
RESPONSES_PATH="${RESPONSES_DIR}/${PROMPT_NAME}__${MODEL_NAME_SHORT}__${DATE}_part_${PART}.json"

if [ ! -f "${RESPONSES_PATH}" ]; then
  echo -e "\n\n---------RELABEL: Generate Responses for Part ${PART}---------\n\n"
  
  echo "Executing Python script with the following parameters:"
  echo "Model name: ${MODEL_NAME}"
  echo "Prompt file: ${PROMPT_FILE}"
  echo "Output path: ${RESPONSES_PATH}"
  echo "VLLM batch size: ${VLLM_MAX_NUM_SEQS}"
  echo "Tensor parallel size: ${TENSOR_PARALLEL_SIZE}"
  echo "Python VLLM: ${PYTHON_VLLM}"
  
  if ! ${PYTHON_VLLM} src/turtlegfx_datagen/inference/build_responses_pixtral.py \
    --model_name "${MODEL_NAME}" \
    --prompt_file "${PROMPT_FILE}" \
    --max_new_tokens 4096 \
    --do_sample \
    --quantization \
    --top_p 1 \
    --temperature 0.01 \
    --vllm_batch_size ${VLLM_MAX_NUM_SEQS} \
    --tensor_parallel_size ${TENSOR_PARALLEL_SIZE} \
    --output_path "${RESPONSES_PATH}"; then
    echo "Error: Python script failed to execute properly."
    exit 1
  fi
else
  echo "Responses file ${RESPONSES_PATH} already exists. Skipping RELABEL Response Generation."
fi

echo "---------Response Generation Complete---------"