#!/bin/bash

# Default to actual run mode
DRY_RUN=${DRY_RUN:-false}
path=$1

# Create output directory
mkdir -p logs/generate

# Generate sequence of checkpoints
checkpoints=()
for i in $(seq -f "%06g" 100 100 1600); do
    checkpoints+=("state_step${i}.pt")
done

# Process checkpoints in batches of 8 using available GPUs
for ((i = 0; i < ${#checkpoints[@]}; i += 8)); do
    for j in {0..7}; do
        if [ $((i + j)) -lt ${#checkpoints[@]} ]; then
            checkpoint=${checkpoints[$((i + j))]}
            if [ "$DRY_RUN" = true ]; then
                echo "Would run: CUDA_VISIBLE_DEVICES=$j python inference_SFT.py \
                    --model_path ${path}/${checkpoint} \
                    --input_path unbalanced_pretrain/sft_power_test.txt \
                    --output_path logs/generate/${checkpoint}.json \
                    --first_n 50000 \
                    --processes_per_gpu 2"
            else
                echo "Running inference for ${checkpoint} on GPU $j"
                CUDA_VISIBLE_DEVICES=$j python inference_SFT.py \
                    --model_path ${path}/${checkpoint} \
                    --input_path unbalanced_pretrain/sft_power_test.txt \
                    --output_path logs/generate/${checkpoint}.json \
                    --first_n 50000 \
                    --processes_per_gpu 2 &
            fi
        fi
    done
    if [ "$DRY_RUN" = false ]; then
        wait
    else
        echo "Would wait"
    fi
done