#!/bin/bash
#SBATCH --job-name=codescoring_post
#SBATCH --partition=a100                       # Use CPU partition
#SBATCH -c 8                                  # Number of cores
#SBATCH -N 1                                  # Ensure that all cores are on one machine
#SBATCH -t 1-00:00                           # Maximum run-time in D-HH:MM
#SBATCH --mem=64G                            # Memory pool for all cores
#SBATCH --output=%j.out                      # File to which STDOUT will be written
#SBATCH --error=%j.err                       # File to which STDERR will be written

# Function to display usage information
usage() {
    echo "Usage: $0 [-c <experiment_config_file>]"
    echo "  -c <config>     Experiment-specific config file (optional)"
    exit 1
}

# Parse command line arguments
while getopts ":c:" opt; do
    case ${opt} in
        c )
            CONFIG_FILE=$OPTARG
            ;;
        \? )
            usage
            ;;
    esac
done

# Set default config file if not provided
if [ -z "$CONFIG_FILE" ]; then
    CONFIG_FILE="exps/datasetgen/scripts/configs/default_config.sh"
fi

# Check if config file exists
if [ ! -f "$CONFIG_FILE" ]; then
    echo "Error: Experiment config file '$CONFIG_FILE' not found."
    exit 1
fi

# Source the global and experiment-specific configuration files
source "$CONFIG_FILE"
if [ -f "exps/datasetgen/scripts/configs/global_config.sh" ]; then
    source "exps/datasetgen/scripts/configs/global_config.sh"
else
    echo "Error: Global config file 'exps/datasetgen/scripts/configs/global_config.sh' not found."
    exit 1
fi

INPUT=$(find ${CODESCORING_RESPONSES_DIR} -regex ".*/${CODESCORING_PROMPT_NAME}__${CODESCORING_MODEL_NAME_SHORT}__${DATE}_part_[0-9]+\.json" 2>/dev/null)
OUTPUT="exps/datasetgen/results/${DATE}/iter$((ITERATION+1))/seed_dataset_iter$((ITERATION+1)).json"

if [ ! -f "${OUTPUT}" ] && ls ${INPUT} > /dev/null 2>&1; then
    echo -e "\n\n---------CODESCORING: Postprocess Responses---------\n\n"
    echo "Processing responses with the following parameters:"
    echo "Input path: ${INPUT}"
    echo "Output path: ${OUTPUT}"

    if ! ${PYTHON_TURTLE} src/turtlegfx_datagen/codescoring/postprocess.py \
        --input_paths ${INPUT} \
        --output_path ${OUTPUT} \
        --top_percentage ${TOP_PERCENTAGE}
    then
        echo "Error: Python script failed to execute properly."
        exit 1
    fi

else
    echo "Postprocessed file ${OUTPUT} already exists. Skipping postprocessing."
fi

echo "---------Postprocessing Complete---------"
