#!/bin/bash

# Change to LLaGA directory
cd ../LLMPredictor/LLaGA/

# Create logs directory if it doesn't exist
mkdir -p logs_atk

# Check if required parameters are provided
if [ $# -lt 3 ]; then
    echo "Usage: bash run_llaga_atk_ind.sh <dataset_name> <attack> <ptb_rate> [seed] [source_prompt] [gpu_id] [llm_model] [atk_emb_type] [atk_seed]"
    echo "Supported datasets: cora, citeseer, wikics, instagram, pubmed, reddit, photo, computer, history, arxiv"
    echo "Supported attacks: pgd, pgdguard_0.0, pgdguard_0.3, pgdguard_0.5, pgdguard_0.7, grbcd, prbcd, textfooler, llm, wtgia"
    echo "Source prompt options: None, noise, noisetxt, sim"
    echo "Supported LLMs: Mistral-7B (default)"
    echo "Example: bash run_llaga_atk_ind.sh cora pgd 0.1 0 noise 0 Mistral-7B bow 0"
    exit 1
fi

DATASET=$1
ATTACK=$2
PTB_RATE=$3
SEED=${4:-0}           # Default seed is 0
SOURCE_PROMPT=${5:-None}  # Default source_prompt is None
GPU_ID=${6:-0}         # Default GPU ID is 0
LLM=${7:-Mistral-7B}   # Default LLM is Mistral-7B
ATK_EMB_TYPE=${8:-bow} # Default attack embedding type is bow
ATK_SEED=${9:-$SEED}   # Default attack seed is same as data seed

# Construct source prompt argument
if [ "$SOURCE_PROMPT" != "None" ]; then
    SOURCE_PROMPT_ARG="--source_prompt $SOURCE_PROMPT"
    LOG_SUFFIX="_${SOURCE_PROMPT}"
else
    SOURCE_PROMPT_ARG=""
    LOG_SUFFIX=""
fi

# Determine attack type based on attack name
if [[ "$ATTACK" == "textfooler" || "$ATTACK" == "llm" || "$ATTACK" == "gpt" ]]; then
    ATK_TYPE="text"
elif [[ "$ATTACK" == "wtgia" ]]; then
    ATK_TYPE="hybrid"
else
    ATK_TYPE="structure"
fi

echo "Starting LLaGA attack evaluation at $(date)..."
echo "Dataset: $DATASET, Attack: $ATTACK, PTB Rate: $PTB_RATE, Seed: $SEED, Source Prompt: $SOURCE_PROMPT, GPU: $GPU_ID, LLM: $LLM"
echo "Attack Embedding Type: $ATK_EMB_TYPE, Attack Seed: $ATK_SEED, Attack Type: $ATK_TYPE"

# Define Python commands based on dataset and LLM
if [ "$DATASET" = "cora" ] || [ "$DATASET" = "citeseer" ]; then
    # Small-scale datasets
    echo "Running $DATASET with small-scale configuration..."
    
    if [ "$LLM" = "Mistral-7B" ]; then
        # Mistral-7B configuration for attack evaluation
        python3 -u main_atk.py --neighbor_template=HO --dataset=$DATASET --seed=$SEED --re_split=2 --num_epochs=10 --llm=$LLM --patience=4 --gpu_id=$GPU_ID --attack=$ATTACK --atk_type=$ATK_TYPE --ptb_rate=$PTB_RATE --atk_emb_type=$ATK_EMB_TYPE --atk_seed=$ATK_SEED $SOURCE_PROMPT_ARG 2>&1 | tee ./logs_atk/attack_inductive_${DATASET}_${LLM}_${ATTACK}_${ATK_TYPE}_ptb${PTB_RATE//.}_${ATK_EMB_TYPE}_atkseed${ATK_SEED}_seed${SEED}${LOG_SUFFIX}.log
    fi

elif [ "$DATASET" = "wikics" ] || [ "$DATASET" = "instagram" ] || [ "$DATASET" = "pubmed" ]; then
    # Middle-scale datasets (~10,000)
    echo "Running $DATASET with middle-scale configuration..."
    
    if [ "$LLM" = "Mistral-7B" ]; then
        python3 -u main_atk.py --neighbor_template=HO --dataset=$DATASET --re_split=2 --num_epochs=8 --llm=$LLM --patience=2 --seed=$SEED --gpu_id=$GPU_ID --attack=$ATTACK --atk_type=$ATK_TYPE --ptb_rate=$PTB_RATE --atk_emb_type=$ATK_EMB_TYPE --atk_seed=$ATK_SEED $SOURCE_PROMPT_ARG 2>&1 | tee ./logs_atk/attack_inductive_${DATASET}_${LLM}_${ATTACK}_${ATK_TYPE}_ptb${PTB_RATE//.}_${ATK_EMB_TYPE}_atkseed${ATK_SEED}_seed${SEED}${LOG_SUFFIX}.log
    fi

elif [ "$DATASET" = "reddit" ] || [ "$DATASET" = "photo" ] || [ "$DATASET" = "computer" ] || [ "$DATASET" = "history" ]; then
    # Large-scale datasets (~40,000)
    echo "Running $DATASET with large-scale configuration..."
    
    if [ "$LLM" = "Mistral-7B" ]; then
        python3 -u main_atk.py --neighbor_template=HO --dataset=$DATASET --re_split=2 --num_epochs=6 --llm=$LLM --patience=2 --seed=$SEED --gpu_id=$GPU_ID --attack=$ATTACK --atk_type=$ATK_TYPE --ptb_rate=$PTB_RATE --atk_emb_type=$ATK_EMB_TYPE --atk_seed=$ATK_SEED $SOURCE_PROMPT_ARG 2>&1 | tee ./logs_atk/attack_inductive_${DATASET}_${LLM}_${ATTACK}_${ATK_TYPE}_ptb${PTB_RATE//.}_${ATK_EMB_TYPE}_atkseed${ATK_SEED}_seed${SEED}${LOG_SUFFIX}.log
    fi

elif [ "$DATASET" = "arxiv" ]; then
    # ArXiv special configuration
    echo "Running $DATASET with arxiv configuration..."
    
    if [ "$LLM" = "Mistral-7B" ]; then
        python3 -u main_atk.py --neighbor_template=HO --seed=$SEED --gpu_id=$GPU_ID --dataset=arxiv --re_split=0 --num_epochs=2 --llm=$LLM --patience=1 --batch_size=12 --max_txt_length=460 --max_ans_length=20 --eval_batch_size=32 --attack=$ATTACK --atk_type=$ATK_TYPE --ptb_rate=$PTB_RATE --atk_emb_type=$ATK_EMB_TYPE --atk_seed=$ATK_SEED $SOURCE_PROMPT_ARG 2>&1 | tee ./logs_atk/attack_${DATASET}_${LLM}_${ATTACK}_${ATK_TYPE}_ptb${PTB_RATE//.}_${ATK_EMB_TYPE}_atkseed${ATK_SEED}_seed${SEED}${LOG_SUFFIX}.log
    fi

else
    echo "Error: Unsupported dataset '$DATASET'"
    echo "Supported datasets: cora, citeseer, wikics, instagram, pubmed, reddit, photo, computer, history, arxiv"
    exit 1
fi

echo "LLaGA attack evaluation completed at $(date). Check log files for results." 