#!/usr/bin/env bash
# Keep trying to create tpu-node-2 in us-east5-a until capacity opens up.
set -u

NAME="tpu-node-2"
ZONE="us-east5-a"
TYPE="v6e-1"
VERSION="v2-alpha-tpuv6e"
PROJECT="YOUR_GCP_PROJECT"
SLEEP_SEC="${SLEEP_SEC:-120}"
LOG="/path/to/autocomp/scripts/retry_tpu_create.log"

attempt=0
while true; do
    attempt=$((attempt + 1))
    ts=$(date '+%Y-%m-%d %H:%M:%S')
    echo "[$ts] attempt $attempt: creating $NAME in $ZONE..." | tee -a "$LOG"
    out=$(gcloud alpha compute tpus tpu-vm create "$NAME" \
        --zone="$ZONE" \
        --accelerator-type="$TYPE" \
        --version="$VERSION" \
        --project="$PROJECT" 2>&1)
    rc=$?
    echo "$out" | tail -5 >> "$LOG"
    if [ $rc -eq 0 ]; then
        echo "[$(date '+%Y-%m-%d %H:%M:%S')] SUCCESS on attempt $attempt" | tee -a "$LOG"
        exit 0
    fi
    if echo "$out" | grep -q "ALREADY_EXISTS\|already exists"; then
        echo "[$(date '+%Y-%m-%d %H:%M:%S')] already exists, exiting" | tee -a "$LOG"
        exit 0
    fi
    sleep "$SLEEP_SEC"
done
