#!/bin/bash
#############################################################################
# Hypothesis Composition Case Study Runner
# Compares base model vs LoRA model performance on hypothesis generation
#
# This script runs a single case study and is designed for single-GPU usage
# (since it loads two models: base + LoRA for comparison)
#
# Usage:
#   bash run_case_study_hypothesis_composition.sh
#   bash run_case_study_hypothesis_composition.sh --gpu 0
#   bash run_case_study_hypothesis_composition.sh --input custom_input.json
#############################################################################

set -e

# Add trap to cleanup on exit
trap 'echo "Script interrupted."' INT TERM

echo "============================================================"
echo "HYPOTHESIS COMPOSITION - CASE STUDY"
echo "============================================================"
echo "Starting at: $(date)"
echo ""

###############################################################################
# Configuration - MODIFY THESE VALUES
###############################################################################

## MODEL_PATH: Base model path
BASE_MODEL_PATH="/pfs/training-data/hf/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"

## LORA_PATH: LoRA checkpoint path - MODIFY THIS PATH
LORA_PATH="${folder_checkpoints}/lora_training_hypothesis_composition_147k_update_format"
# LORA_PATH="${folder_checkpoints}/lora_training_hypothesis_composition_147k_curriculum_learning_42_seed"
# LORA_PATH="${folder_checkpoints}/lora_training_hypothesis_composition_196k_42_seed"

## INPUT_FILE: Custom input template (optional)
INPUT_FILE="Evaluation/case_study_input_template_hypothesis_composition.json"

## OUTPUT_DIR: Where to save case study results - MODIFY THIS PATH
OUTPUT_DIR="${folder_evaluation_results}/case_study_results"

## GPU to use (default: 0)
GPU_ID=0

## Generation parameters
MAX_NEW_TOKENS=4096
TEMPERATURE=0.6
TOP_P=0.9
REPETITION_PENALTY=1.2

###############################################################################
# Parse Command Line Arguments
###############################################################################

while [[ $# -gt 0 ]]; do
    case $1 in
        --base_model_path)
            BASE_MODEL_PATH="$2"
            shift 2
            ;;
        --lora_path)
            LORA_PATH="$2"
            shift 2
            ;;
        --input|--input_file)
            INPUT_FILE="$2"
            shift 2
            ;;
        --output_dir)
            OUTPUT_DIR="$2"
            shift 2
            ;;
        --gpu)
            GPU_ID="$2"
            shift 2
            ;;
        --max_new_tokens)
            MAX_NEW_TOKENS="$2"
            shift 2
            ;;
        --temperature)
            TEMPERATURE="$2"
            shift 2
            ;;
        --no_lora)
            LORA_PATH=""
            shift
            ;;
        --help)
            echo "Usage: $0 [options]"
            echo ""
            echo "Options:"
            echo "  --base_model_path PATH   Path to base model"
            echo "  --lora_path PATH         Path to LoRA checkpoint (or --no_lora)"
            echo "  --input FILE             Custom input JSON file"
            echo "  --output_dir DIR         Output directory"
            echo "  --gpu ID                 GPU to use (default: 0)"
            echo "  --max_new_tokens N       Max tokens to generate (default: 4096)"
            echo "  --temperature FLOAT      Generation temperature (default: 0.6)"
            echo "  --no_lora                Run without LoRA (base model only)"
            echo "  --help                   Show this help message"
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            exit 1
            ;;
    esac
done

###############################################################################
# Display Configuration
###############################################################################

echo "Configuration:"
echo "------------------------------------------------------------"
echo "Base Model:      $BASE_MODEL_PATH"
echo "LoRA Path:       ${LORA_PATH:-"(none - base model only)"}"
echo "Input File:      ${INPUT_FILE:-"(using default example)"}"
echo "Output Dir:      $OUTPUT_DIR"
echo "GPU:             $GPU_ID"
echo "Max New Tokens:  $MAX_NEW_TOKENS"
echo "Temperature:     $TEMPERATURE"
echo ""

# Create output directory
mkdir -p "$OUTPUT_DIR"

# Change to MOOSE-M1 root directory
cd "$(dirname "$0")/.."

# Get timestamp for output file
TIMESTAMP=$(date +%Y%m%d_%H%M%S)

# Determine output filename based on configuration
if [ -n "$LORA_PATH" ]; then
    LORA_NAME=$(basename "$LORA_PATH")
    OUTPUT_FILE="$OUTPUT_DIR/case_study_${LORA_NAME}_${TIMESTAMP}.json"
else
    OUTPUT_FILE="$OUTPUT_DIR/case_study_base_model_${TIMESTAMP}.json"
fi

###############################################################################
# Show GPU Status
###############################################################################

echo "GPU Status:" 
echo "------------------------------------------------------------"
if command -v nvidia-smi &> /dev/null; then
    nvidia-smi --query-gpu=index,name,memory.used,memory.total --format=csv,noheader -i $GPU_ID 2>/dev/null || echo "  GPU $GPU_ID not available"
else
    echo "  nvidia-smi not available"
fi
echo ""

###############################################################################
# Run Case Study
###############################################################################

echo "Running case study..."
echo "============================================================"

# Build command
CMD="CUDA_VISIBLE_DEVICES=$GPU_ID python Evaluation/hypothesis_composition_case_study.py"
CMD="$CMD --base_model_path \"$BASE_MODEL_PATH\""

if [ -n "$LORA_PATH" ]; then
    CMD="$CMD --lora_path \"$LORA_PATH\""
fi

if [ -n "$INPUT_FILE" ] && [ -f "$INPUT_FILE" ]; then
    CMD="$CMD --input_file \"$INPUT_FILE\""
else
    echo "Note: Using default case study example (no input file specified or file not found)"
fi

CMD="$CMD --output_file \"$OUTPUT_FILE\""
CMD="$CMD --max_new_tokens $MAX_NEW_TOKENS"
CMD="$CMD --temperature $TEMPERATURE"
CMD="$CMD --top_p $TOP_P"
CMD="$CMD --repetition_penalty $REPETITION_PENALTY"
CMD="$CMD --load_in_8bit"

# Execute
eval $CMD

echo ""
echo "============================================================"
echo "CASE STUDY COMPLETE"
echo "============================================================"
echo "Finished at: $(date)"
echo ""
echo "Results saved to:"
echo "  $OUTPUT_FILE"
echo ""
echo "To view results:"
echo "  cat $OUTPUT_FILE | python -m json.tool"
