#!/bin/bash

MAX_PARALLEL_JOBS=4
JOBS_DIR="batch_jobs"

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --max_parallel)
            MAX_PARALLEL_JOBS="$2"
            shift 2
            ;;
        --jobs_dir)
            JOBS_DIR="$2"
            shift 2
            ;;
        *)
            echo "Unknown parameter: $1"
            exit 1
            ;;
    esac
done

if [ ! -d "$JOBS_DIR" ]; then
    echo "Error: Jobs directory '$JOBS_DIR' not found"
    exit 1
fi

# Initialize job tracking
declare -A running_jobs
failed_jobs=()
successful_jobs=()

# Function to run a single job
run_job() {
    local job_file=$1
    local job_name=$(basename "$job_file")

    echo "Running job: $job_name"
    bash -c "$(cat $job_file)"

    if [ $? -eq 0 ]; then
        echo "Job completed successfully: $job_name"
        return 0
    else
        echo "Job failed: $job_name"
        # Record failed jobs in a file
        echo "$job_file" >> "$JOBS_DIR/failed_jobs.txt"
        return 1
    fi
}

# Get all job files
if [ -f "$JOBS_DIR/failed_jobs.txt" ]; then
    # Resubmit failed jobs mode
    echo "Found failed jobs file, resubmitting failed jobs..."

    # Normal execution mode
    JOB_FILES=($(cat "$JOBS_DIR/failed_jobs.txt"))
    if [ ${#JOB_FILES[@]} -eq 0 ]; then
        echo "No failed jobs to retry"
        exit 1
    fi
else
    # Normal execution mode
    JOB_FILES=("$JOBS_DIR"/*.job)
fi

if [ ${#JOB_FILES[@]} -eq 0 ]; then
    echo "No job files found in $JOBS_DIR"
    exit 1
fi

echo "Found ${#JOB_FILES[@]} jobs to process"

# Process jobs
for job_file in "${JOB_FILES[@]}"; do
    # Wait if we've reached max parallel jobs
    while [ ${#running_jobs[@]} -ge $MAX_PARALLEL_JOBS ]; do
        for job_pid in "${!running_jobs[@]}"; do
            if ! kill -0 $job_pid 2>/dev/null; then
                wait $job_pid
                exit_status=$?
                job_name=${running_jobs[$job_pid]}
                unset running_jobs[$job_pid]

                if [ $exit_status -eq 0 ]; then
                    successful_jobs+=($job_name)
                else
                    failed_jobs+=($job_name)
                fi
            fi
        done
        sleep 1
    done

    # Start new job
    job_name=$(basename "$job_file")
    run_job "$job_file" &
    running_jobs[$!]=$job_name
done

# Wait for remaining jobs
for job_pid in "${!running_jobs[@]}"; do
    wait $job_pid
    exit_status=$?
    job_name=${running_jobs[$job_pid]}

    if [ $exit_status -eq 0 ]; then
        successful_jobs+=($job_name)
    else
        failed_jobs+=($job_name)
    fi
done

# Print summary
echo -e "\nJob Summary:"
echo "Successful jobs (${#successful_jobs[@]}): ${successful_jobs[@]}"
echo "Failed jobs (${#failed_jobs[@]}): ${failed_jobs[@]}"

# Guidance for failed jobs
if [ ${#failed_jobs[@]} -gt 0 ]; then
    echo -e "\nSome jobs failed. To retry failed jobs, run:"
    echo "bash ./src/openai/submit_batch_jobs.sh --jobs_dir $JOBS_DIR"
    exit 1
fi

exit 0
