#!/bin/bash

# Path to the run script (modify if needed)
RUN_SCRIPT="/YOUR_ROOT_PATH/scripts/run_FARE_regularized.sh"

# Check interval in seconds
CHECK_INTERVAL=60

# Generate a timestamp for the log file
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOG_FILE="gpu_monitor_${TIMESTAMP}.log"

# Check if run script exists and is executable
if [ ! -x "$RUN_SCRIPT" ]; then
    echo "Error: $RUN_SCRIPT does not exist or is not executable."
    exit 1
fi

echo "Starting GPU monitor at $(date)" | tee -a "$LOG_FILE"
echo "Will run $RUN_SCRIPT when all GPUs are free" | tee -a "$LOG_FILE"
echo "Logging to $LOG_FILE" | tee -a "$LOG_FILE"

while true; do
    # Use a more reliable method to check for running processes
    PROCESS_COUNT=$(nvidia-smi | grep -c "python3")
    
    if [ "$PROCESS_COUNT" -eq 0 ]; then
        echo "$(date): All GPUs are free! Running $RUN_SCRIPT" | tee -a "$LOG_FILE"
        $RUN_SCRIPT
        echo "$(date): $RUN_SCRIPT execution completed" | tee -a "$LOG_FILE"
        break
    else
        # Get a simpler count of busy GPUs
        GPU_LIST=$(nvidia-smi | grep "^ *[0-9]" | cut -d' ' -f4)
        GPU_COUNT=$(echo "$GPU_LIST" | wc -l)
        
        echo "$(date): $PROCESS_COUNT processes running on $GPU_COUNT GPUs" | tee -a "$LOG_FILE"
        echo "Checking again in $CHECK_INTERVAL seconds..." | tee -a "$LOG_FILE"
        sleep $CHECK_INTERVAL
    fi
done

echo "$(date): GPU monitor exiting" | tee -a "$LOG_FILE"