#!/bin/bash

# Change to GraphGPT directory
cd ../LLMPredictor/GraphGPT/

# Check if dataset name is provided
if [ $# -eq 0 ]; then
    echo "Usage: bash run_graphgpt_ind.sh <dataset_name> [seed] [prompt] [noise_ratio] [gpu_id]"
    echo "Supported datasets: cora, citeseer, instagram, wikics, pubmed, reddit, photo, history, computer, arxiv"
    echo "Prompt options: None, noise, noisetxt"
    echo "Example: bash run_graphgpt_ind.sh cora 0 noise 0.1 0"
    exit 1
fi

DATASET=$1
SEED=${2:-0}          # Default seed is 0
PROMPT=${3:-None}     # Default prompt is None (no noise)
NOISE_RATIO=${4:-0.1} # Default noise ratio is 0.1
GPU_ID=${5:-0}        # Default GPU ID is 0

# Construct prompt argument
if [ "$PROMPT" = "None" ] || [ "$PROMPT" = "none" ] || [ -z "$PROMPT" ]; then
    PROMPT_ARG=""
    NOISE_SUFFIX=""
else
    PROMPT_ARG="--prompt=$PROMPT --noise_ratio=$NOISE_RATIO"
    NOISE_SUFFIX="_${PROMPT}"
fi

echo "Starting GraphGPT training at $(date)..."
echo "Dataset: $DATASET, Seed: $SEED, Prompt: $PROMPT, Noise Ratio: $NOISE_RATIO, GPU: $GPU_ID"

# Define Python commands based on dataset with noise support
if [ "$DATASET" = "cora" ] || [ "$DATASET" = "citeseer" ] || [ "$DATASET" = "instagram" ]; then
    # For cora, citeseer, instagram: transductive setting (re_split=0, s2_epoch=20) and inductive setting (re_split=1, s2_epoch=10)
    echo "Running $DATASET with standard configuration..."
    python3 -u main.py --dataset=$DATASET --do_stage1=1 --seed=$SEED --load_ground_embedding=0 --re_split=2 --s2_epoch=10 --gpu_id=$GPU_ID --s2_batch_size=16 $PROMPT_ARG 2>&1 | tee ./logs/inductive_${DATASET}_seed${SEED}${NOISE_SUFFIX}.log

elif [ "$DATASET" = "wikics" ] || [ "$DATASET" = "pubmed" ]; then
    echo "Running $DATASET with wikics/pubmed configuration..."
    python3 -u main.py --dataset=$DATASET --do_stage1=1 --seed=$SEED --load_ground_embedding=0 --re_split=2 --s2_epoch=6 --gpu_id=$GPU_ID --s2_batch_size=16 $PROMPT_ARG 2>&1 | tee ./logs/inductive_${DATASET}_seed${SEED}${NOISE_SUFFIX}.log

elif [ "$DATASET" = "reddit" ] || [ "$DATASET" = "photo" ] || [ "$DATASET" = "history" ] || [ "$DATASET" = "computer" ]; then
    # For reddit, photo, history, computer: s1_epoch=1, s2_epoch=4/2
    echo "Running $DATASET with large dataset configuration..."
    python3 -u main.py --dataset=$DATASET --do_stage1=0 --seed=$SEED --load_ground_embedding=0 --re_split=2 --s2_epoch=2 --s2_batch_size=16 --gpu_id=$GPU_ID $PROMPT_ARG 2>&1 | tee ./logs/inductive_${DATASET}_seed${SEED}${NOISE_SUFFIX}.log

elif [ "$DATASET" = "arxiv" ]; then
    # For arxiv: special configuration
    echo "Running $DATASET with arxiv configuration..."
    python3 -u main.py --dataset=arxiv --do_stage1=0 --seed=$SEED --load_ground_embedding=0 --re_split=0 --s2_epoch=2 --gpu_id=$GPU_ID $PROMPT_ARG 2>&1 | tee ./logs/arxiv_seed${SEED}${NOISE_SUFFIX}.log

else
    echo "Error: Unsupported dataset '$DATASET'"
    echo "Supported datasets: cora, citeseer, instagram, wikics, pubmed, reddit, photo, history, computer, arxiv"
    exit 1
fi

echo "GraphGPT training completed at $(date). Check log files for results."