#!/bin/bash

# Change to GraphGPT directory
cd ../LLMPredictor/GraphGPT/

# Check if dataset name is provided
if [ $# -eq 0 ]; then
    echo "Usage: bash run_graphgpt_trans.sh <dataset_name> [seed] [prompt] [gpu_id]"
    echo "Supported datasets: cora, citeseer, instagram, wikics, pubmed, reddit, photo, history, computer, arxiv"
    echo "Prompt options: None, noise, noisetxt"
    echo "Example: bash run_graphgpt_trans.sh cora 0 noise 0"
    exit 1
fi

DATASET=$1
SEED=${2:-0}      # Default seed is 0
PROMPT=${3:-None} # Default prompt is None
GPU_ID=${4:-0}    # Default GPU ID is 0

# Construct prompt argument
if [ "$PROMPT" != "None" ]; then
    PROMPT_ARG="--prompt=$PROMPT"
    LOG_SUFFIX="_${PROMPT}"
else
    PROMPT_ARG=""
    LOG_SUFFIX=""
fi

echo "Starting GraphGPT training at $(date)..."
echo "Dataset: $DATASET, Seed: $SEED, Prompt: $PROMPT, GPU: $GPU_ID"

# Define Python commands based on dataset
if [ "$DATASET" = "cora" ] || [ "$DATASET" = "citeseer" ] || [ "$DATASET" = "instagram" ]; then
    echo "Running $DATASET with standard configuration..."
    python3 -u main.py --dataset=$DATASET --do_stage1=0 --seed=$SEED --load_ground_embedding=0 --re_split=1 --s2_epoch=20 --gpu_id=$GPU_ID --s2_batch_size=16 $PROMPT_ARG 2>&1 | tee ./logs/transductive_${DATASET}_seed${SEED}${LOG_SUFFIX}.log

elif [ "$DATASET" = "wikics" ] || [ "$DATASET" = "pubmed" ]; then
    # For wikics, pubmed: s1_epoch=1, s2_epoch=10/6
    echo "Running $DATASET with wikics/pubmed configuration..."
    python3 -u main.py --dataset=$DATASET --do_stage1=1 --s1_epoch=1 --seed=$SEED --load_ground_embedding=0 --re_split=1 --s2_epoch=10 --gpu_id=$GPU_ID --s2_batch_size=16 $PROMPT_ARG 2>&1 | tee ./logs/transductive_${DATASET}_seed${SEED}${LOG_SUFFIX}.log

elif [ "$DATASET" = "reddit" ] || [ "$DATASET" = "photo" ] || [ "$DATASET" = "history" ] || [ "$DATASET" = "computer" ]; then
    # For reddit, photo, history, computer: s1_epoch=1, s2_epoch=4/2
    echo "Running $DATASET with large dataset configuration..."
    python3 -u main.py --dataset=$DATASET --do_stage1=1 --s1_epoch=1 --seed=$SEED --load_ground_embedding=0 --re_split=1 --s2_epoch=4 --s2_batch_size=16 --gpu_id=$GPU_ID $PROMPT_ARG 2>&1 | tee ./logs/transductive_${DATASET}_seed${SEED}${LOG_SUFFIX}.log

else
    echo "Error: Unsupported dataset '$DATASET'"
    echo "Supported datasets: cora, citeseer, instagram, wikics, pubmed, reddit, photo, history, computer, arxiv"
    exit 1
fi

echo "GraphGPT training completed at $(date). Check log files for results."