#!/bin/bash

# BUFFER_SIZES=(262144 1048576 4194304 16777216 67108864 268435456 536870912 1073741824) # perlmutter
# padded buffer sizes
# STRAGGLAR=(264192 1050624 4196352 16779264 67110912 268437504 536871936 1073743872)  # perlmutter

BUFFER_SIZES=(1048576 4194304 16777216 67108864 268435456 1073741824 2147483648 3221225472 4294967296) # runpod
STRAGGLAR=(1060864 4214784 16801792 67121152 268455936 1073766400 2147504128 3221241856 4294979584)  # runpod


ALGORITHMS=("allpairs" "ring" "rhd" "stragglar" "direct")
NUM_ITERS=50
DELAY=9.455  # either set this to -1 or 9.455, for assumed masked delay or average delay
OUTPUT_FILE="../data/8gpu_scaling_with_delay.csv"
BINARY="../code/allreduce"

echo "algorithm,buffer_size_bytes,iteration,delay,runtime_ms,bw_gbs" > "$OUTPUT_FILE"

for alg in "${ALGORITHMS[@]}"; do
  for idx in "${!BUFFER_SIZES[@]}"; do
    size=${BUFFER_SIZES[idx]}
    extra=${STRAGGLAR[idx]}                 # safe even for non‑stragglar algorithms

    echo "Running $alg on size $size ..."

    if [[ $alg == "stragglar" ]]; then
      # pass the extra arg only for stragglar
      "$BINARY" "$extra" "$alg" "$NUM_ITERS" "$DELAY"
    else
      "$BINARY" "$size" "$alg" "$NUM_ITERS" "$DELAY"
    fi | grep -E '^[a-z]+,[0-9]+,[0-9]+,-?[0-9]+\.[0-9]+,[0-9]+\.[0-9]+,[0-9]+\.[0-9]+$' >> "$OUTPUT_FILE"

  done
done