#!/bin/bash
#SBATCH --job-name=datasetgen
#SBATCH --partition=a100                      # Use GPU partition "a100"
#SBATCH --gres gpu:2                          # set 4 GPUs per job
#SBATCH -c 32                                 # Number of cores
#SBATCH -N 1                                  # Ensure that all cores are on one machine
#SBATCH -t 4-00:00                            # Maximum run-time in D-HH:MM
#SBATCH --mem=250G                             # Memory pool for all cores (see also --mem-per-cpu)
#SBATCH --output=%j.out                       # File to which STDOUT will be written
#SBATCH --error=%j.err                        # File to which STDERR will be written

# Function to display usage information
usage() {
    echo "Usage: $0 [-c <experiment_config_file>] [-p <part>]"
    echo "  -c <config>     Experiment-specific config file (optional)"
    echo "  -p <part>       Part number to process"
    exit 1
}

# Parse command line arguments
while getopts ":c:p:" opt; do
    case ${opt} in
        c )
            CONFIG_FILE=$OPTARG
            ;;
        p )
            PART=$OPTARG
            ;;
        \? )
            usage
            ;;
    esac
done

# Check if part number is provided
if [ -z "$PART" ]; then
    echo "Error: Part number (-p) must be specified"
    usage
fi

# Set default config file if not provided
if [ -z "$CONFIG_FILE" ]; then
    CONFIG_FILE="exps/datasetgen/scripts/configs/default_config.sh"
fi

# Check if config file exists
if [ ! -f "$CONFIG_FILE" ]; then
    echo "Error: Experiment config file '$CONFIG_FILE' not found."
    exit 1
fi

source "$CONFIG_FILE"
if [ -f "exps/datasetgen/scripts/configs/global_config.sh" ]; then
    source "exps/datasetgen/scripts/configs/global_config.sh"
else
    echo "Error: Global config file 'exps/datasetgen/scripts/configs/global_config.sh' not found."
    exit 1
fi

##############################
# ENV
##############################


# Variables
CODEGEN_PROMPT_FILE="${CODEGEN_PROMPTS_DIR}/prompts_${CODEGEN_PROMPT_NAME}_part_${PART}.json"

# CODEGEN: Generate Responses
CODEGEN_RESPONSES_PATH="${CODEGEN_RESPONSES_DIR}/${CODEGEN_PROMPT_NAME}__${CODEGEN_MODEL_NAME_SHORT}__${DATE}_part_${PART}.json"

if [ ! -f "${CODEGEN_RESPONSES_PATH}" ]; then
  echo -e "\n\n---------CODEGEN: Generate Responses for Part ${PART}---------\n\n"
  
  echo "Executing Python script with the following parameters:"
  echo "Model name: ${CODEGEN_MODEL_NAME}"
  echo "Prompt file: ${CODEGEN_PROMPT_FILE}"
  echo "Output path: ${CODEGEN_RESPONSES_PATH}"
  echo "VLLM batch size: ${VLLM_MAX_NUM_SEQS}"
  echo "Tensor parallel size: ${TENSOR_PARALLEL_SIZE}"
  
  if ! ${PYTHON_TURTLE} src/turtlegfx_datagen/inference/chat_completion_vllm.py \
    --model_name "${CODEGEN_MODEL_NAME}" \
    --prompt_file "${CODEGEN_PROMPT_FILE}" \
    --max_new_tokens 1024 \
    --do_sample \
    --quantization \
    --top_p 1 \
    --temperature 0.5 \
    --vllm_batch_size ${VLLM_MAX_NUM_SEQS} \
    --tensor_parallel_size ${TENSOR_PARALLEL_SIZE} \
    --output_path "${CODEGEN_RESPONSES_PATH}"; then
    echo "Error: Python script failed to execute properly."
    exit 1
  fi
else
  echo "Responses file ${CODEGEN_RESPONSES_PATH} already exists. Skipping CODEGEN Response Generation."
fi

echo "---------Response Generation Complete---------"