#!/bin/bash

# Change to LLaGA directory
cd ../LLMPredictor/LLaGA/

# Create logs directory if it doesn't exist
mkdir -p logs

# Check if dataset name is provided
if [ $# -eq 0 ]; then
    echo "Usage: bash run_llaga_ind.sh <dataset_name> [seed] [source_prompt] [gpu_id] [llm_model]"
    echo "Supported datasets: cora, citeseer, wikics, instagram, pubmed, reddit, photo, computer, history, arxiv"
    echo "Source prompt options: None, noise, noisetxt"
    echo "Supported LLMs: Mistral-7B (default), Qwen-3B"
    echo "Example: bash run_llaga_ind.sh cora 0 noise 0 Mistral-7B"
    exit 1
fi

DATASET=$1
SEED=${2:-0}        # Default seed is 0
SOURCE_PROMPT=${3:-None}  # Default source_prompt is None
GPU_ID=${4:-0}      # Default GPU ID is 0
LLM=${5:-Mistral-7B} # Default LLM is Mistral-7B

# Construct source prompt argument
if [ "$SOURCE_PROMPT" != "None" ]; then
    SOURCE_PROMPT_ARG="--prompt $SOURCE_PROMPT"
    LOG_SUFFIX="_${SOURCE_PROMPT}"
else
    SOURCE_PROMPT_ARG=""
    LOG_SUFFIX=""
fi

echo "Starting LLaGA training at $(date)..."
echo "Dataset: $DATASET, Seed: $SEED, Source Prompt: $SOURCE_PROMPT, GPU: $GPU_ID, LLM: $LLM"

# Define Python commands based on dataset and LLM
if [ "$DATASET" = "cora" ] || [ "$DATASET" = "citeseer" ]; then
    # Small-scale datasets
    echo "Running $DATASET with small-scale configuration..."
    python3 -u main.py --neighbor_template=HO --dataset=$DATASET --seed=$SEED --re_split=2 --num_epochs=10 --llm=$LLM --patience=4 --gpu_id=$GPU_ID $SOURCE_PROMPT_ARG 2>&1 | tee ./logs/inductive_${DATASET}_${LLM}_seed${SEED}${LOG_SUFFIX}.log

elif [ "$DATASET" = "wikics" ] || [ "$DATASET" = "instagram" ] || [ "$DATASET" = "pubmed" ]; then
    # Middle-scale datasets (~10,000)
    echo "Running $DATASET with middle-scale configuration..."
    python3 -u main.py --neighbor_template=HO --dataset=$DATASET --re_split=2 --num_epochs=8 --llm=$LLM --patience=2 --seed=$SEED --gpu_id=$GPU_ID $SOURCE_PROMPT_ARG 2>&1 | tee ./logs/inductive_${DATASET}_${LLM}_seed${SEED}${LOG_SUFFIX}.log

elif [ "$DATASET" = "reddit" ] || [ "$DATASET" = "photo" ] || [ "$DATASET" = "computer" ] || [ "$DATASET" = "history" ]; then
    # Large-scale datasets (~40,000)
    echo "Running $DATASET with large-scale configuration..."
    python3 -u main.py --neighbor_template=HO --dataset=$DATASET --re_split=2 --num_epochs=6 --llm=$LLM --patience=2 --seed=$SEED --gpu_id=$GPU_ID $SOURCE_PROMPT_ARG 2>&1 | tee ./logs/inductive_${DATASET}_${LLM}_seed${SEED}${LOG_SUFFIX}.log

elif [ "$DATASET" = "arxiv" ]; then
    # ArXiv special configuration
    echo "Running $DATASET with arxiv configuration..."
    python3 -u main.py --neighbor_template=HO --seed=$SEED --gpu_id=$GPU_ID --dataset=arxiv --re_split=0 --num_epochs=2 --llm=$LLM --patience=1 --batch_size=12 --max_txt_length=460 --max_ans_length=20 --eval_batch_size=32 $SOURCE_PROMPT_ARG 2>&1 | tee ./logs/arxiv_${LLM}_seed${SEED}${LOG_SUFFIX}.log

else
    echo "Error: Unsupported dataset '$DATASET'"
    echo "Supported datasets: cora, citeseer, wikics, instagram, pubmed, reddit, photo, computer, history, arxiv"
    exit 1
fi

echo "LLaGA training completed at $(date). Check log files for results." 