#!/bin/bash

# This script runs the PMO training for multiple models with various alpha and beta hyperparameters.

# Exit immediately if a command exits with a non-zero status.
set -e

# --- Configuration ---

# An array of pretrained models to use for training.
MODELS=(
    "google/gemma-3-1b-pt"
    "FuseAI/FuseChat-Llama-3.2-1B-SFT"
    "Qwen/Qwen2.5-1.5B-Instruct"
)

# An array of "alpha beta" pairs.
# These values are taken from the provided image, with duplicates removed.
PARAMS=(
    "0.5 0"
    "1 0"
    "0.05 0.05"
    "0.9 0.1"
    "0.95 0.05"
)

# --- Training Loop ---

# Loop through each model defined in the MODELS array.
for model in "${MODELS[@]}"; do
    # Create a short, filesystem-friendly name from the model path.
    # For example, "google/gemma-3-1b-pt" becomes "gemma-3-1b-pt".
    model_short_name=$(basename "$model")

    # Loop through each parameter pair defined in the PARAMS array.
    for param_pair in "${PARAMS[@]}"; do
        # Read the alpha and beta values from the pair string.
        read -r alpha beta <<< "$param_pair"

        echo "======================================================================="
        echo "Starting training for model: $model_short_name"
        echo "With parameters: alpha=$alpha, beta=$beta"
        echo "======================================================================="

        # Define dynamic paths for saving checkpoints and results to avoid overwriting.
        save_path="/data/checkpoint/pmo/${model_short_name}-alpha${alpha}-beta${beta}"
        result_path="./results/pmo/${model_short_name}-alpha${alpha}-beta${beta}.csv"

        # Ensure the directory for the save path exists.
        mkdir -p "$(dirname "$save_path")"
        # Ensure the directory for the result path exists.
        mkdir -p "$(dirname "$result_path")"

        # Execute the training command with the current set of parameters.
        python openrlhf/main/pmo_training_prob.py \
            --pretrain "$model" \
            --save_path "$save_path" \
            --alpha "$alpha" \
            --beta "$beta" \
            --result_path "$result_path"

        echo "-----------------------------------------------------------------------"
        echo "Finished training for: $model_short_name with alpha=$alpha, beta=$beta"
        echo "Results saved to: $result_path"
        echo "-----------------------------------------------------------------------"
        echo
    done
done

echo "All training jobs have been completed."
