#!/bin/bash

# Change to GraphGPT directory
cd ../LLMPredictor/GraphGPT/

# Create logs directory if it doesn't exist
mkdir -p logs_atk

# Check if required parameters are provided
if [ $# -lt 3 ]; then
    echo "Usage: bash run_graphgpt_atk_ind.sh <dataset_name> <attack> <ptb_rate> [seed] [source_prompt] [gpu_id] [atk_emb_type] [atk_seed]"
    echo "Supported datasets: cora, citeseer, instagram, wikics, pubmed, reddit, photo, history, computer, arxiv"
    echo "Supported attacks: pgd, grbcd, prbcd, textfooler, llm, wtgia"
    echo "Source prompt options: None, noise, noisetxt"
    echo "Example: bash run_graphgpt_atk_ind.sh cora wtgia 0.2"
    exit 1
fi

DATASET=$1
ATTACK=$2
PTB_RATE=$3
SEED=${4:-0}           # Default seed is 0
SOURCE_PROMPT=${5:-None}  # Default source_prompt is None
GPU_ID=${6:-0}         # Default GPU ID is 0
ATK_EMB_TYPE=${7:-bow} # Default attack embedding type is bow
ATK_SEED=${8:-$SEED}   # Default attack seed is same as data seed

# Construct source prompt argument
if [ "$SOURCE_PROMPT" = "None" ] || [ "$SOURCE_PROMPT" = "none" ] || [ -z "$SOURCE_PROMPT" ]; then
    SOURCE_PROMPT_ARG=""
    NOISE_SUFFIX=""
else
    SOURCE_PROMPT_ARG="--source_prompt=$SOURCE_PROMPT"
    NOISE_SUFFIX="_${SOURCE_PROMPT}"
fi

# Determine attack type based on attack name
if [[ "$ATTACK" == "textfooler" || "$ATTACK" == "text_fooler" || "$ATTACK" == "llm" || "$ATTACK" == "gpt" ]]; then
    ATK_TYPE="text"
elif [[ "$ATTACK" == "wtgia" ]]; then
    ATK_TYPE="hybrid"
else
    ATK_TYPE="structure"
fi

echo "Starting GraphGPT attack evaluation at $(date)..."
echo "Dataset: $DATASET, Attack: $ATTACK, PTB Rate: $PTB_RATE, Seed: $SEED, Source Prompt: $SOURCE_PROMPT, GPU: $GPU_ID"
echo "Attack Embedding Type: $ATK_EMB_TYPE, Attack Seed: $ATK_SEED, Attack Type: $ATK_TYPE"

# Define Python commands based on dataset with consistent naming
if [ "$DATASET" = "cora" ] || [ "$DATASET" = "citeseer" ] || [ "$DATASET" = "instagram" ]; then
    # For cora, citeseer, instagram: inductive setting (re_split=2, s2_epoch=10)
    echo "Running $DATASET with standard configuration..."
    python3 -u main_atk.py --dataset=$DATASET --do_stage1=1 --seed=$SEED --load_ground_embedding=0 --re_split=2 --s2_epoch=10 --gpu_id=$GPU_ID --s2_batch_size=16 --attack=$ATTACK --atk_type=$ATK_TYPE --ptb_rate=$PTB_RATE --atk_emb_type=$ATK_EMB_TYPE --atk_seed=$ATK_SEED $SOURCE_PROMPT_ARG 2>&1 | tee ./logs_atk/attack_inductive_${DATASET}_${ATTACK}_${ATK_TYPE}_ptb${PTB_RATE//.}_${ATK_EMB_TYPE}_atkseed${ATK_SEED}_seed${SEED}${NOISE_SUFFIX}.log

elif [ "$DATASET" = "wikics" ] || [ "$DATASET" = "pubmed" ]; then
    echo "Running $DATASET with wikics/pubmed configuration..."
    python3 -u main_atk.py --dataset=$DATASET --do_stage1=1 --seed=$SEED --load_ground_embedding=0 --re_split=2 --s2_epoch=6 --gpu_id=$GPU_ID --s2_batch_size=16 --attack=$ATTACK --atk_type=$ATK_TYPE --ptb_rate=$PTB_RATE --atk_emb_type=$ATK_EMB_TYPE --atk_seed=$ATK_SEED $SOURCE_PROMPT_ARG 2>&1 | tee ./logs_atk/attack_inductive_${DATASET}_${ATTACK}_${ATK_TYPE}_ptb${PTB_RATE//.}_${ATK_EMB_TYPE}_atkseed${ATK_SEED}_seed${SEED}${NOISE_SUFFIX}.log

elif [ "$DATASET" = "reddit" ] || [ "$DATASET" = "photo" ] || [ "$DATASET" = "history" ] || [ "$DATASET" = "computer" ]; then
    # For reddit, photo, history, computer: do_stage1=0, s2_epoch=2
    echo "Running $DATASET with large dataset configuration..."
    python3 -u main_atk.py --dataset=$DATASET --do_stage1=0 --seed=$SEED --load_ground_embedding=0 --re_split=2 --s2_epoch=2 --s2_batch_size=16 --gpu_id=$GPU_ID --attack=$ATTACK --atk_type=$ATK_TYPE --ptb_rate=$PTB_RATE --atk_emb_type=$ATK_EMB_TYPE --atk_seed=$ATK_SEED $SOURCE_PROMPT_ARG 2>&1 | tee ./logs_atk/attack_inductive_${DATASET}_${ATTACK}_${ATK_TYPE}_ptb${PTB_RATE//.}_${ATK_EMB_TYPE}_atkseed${ATK_SEED}_seed${SEED}${NOISE_SUFFIX}.log

elif [ "$DATASET" = "arxiv" ]; then
    # For arxiv: special configuration with re_split=0
    echo "Running $DATASET with arxiv configuration..."
    python3 -u main_atk.py --dataset=arxiv --do_stage1=0 --seed=$SEED --load_ground_embedding=0 --re_split=0 --s2_epoch=2 --gpu_id=$GPU_ID --attack=$ATTACK --atk_type=$ATK_TYPE --ptb_rate=$PTB_RATE --atk_emb_type=$ATK_EMB_TYPE --atk_seed=$ATK_SEED $SOURCE_PROMPT_ARG 2>&1 | tee ./logs_atk/attack_inductive_arxiv_${ATTACK}_${ATK_TYPE}_ptb${PTB_RATE//.}_${ATK_EMB_TYPE}_atkseed${ATK_SEED}_seed${SEED}${NOISE_SUFFIX}.log

else
    echo "Error: Unsupported dataset '$DATASET'"
    echo "Supported datasets: cora, citeseer, instagram, wikics, pubmed, reddit, photo, history, computer, arxiv"
    exit 1
fi

echo "GraphGPT attack evaluation completed at $(date). Check log files for results."