#!/bin/bash

# Script to evaluate erase-and-check on adversarial prompts
# generated by the autodan method studied in the following paper:
# AutoDAN: Generating Stealthy Jailbreak Prompts on Aligned Large Language Models,
# Liu et al., 2023.
# https://arxiv.org/abs/2310.04451

# Greedy EC
python main.py \
    --num_prompts 200 \
    --eval_type greedy_ec \
    --use_classifier \
    --model_wt_path models/distilbert_suffix.pt \
    --attack autodan \
    --num_iters 6 \
    --results_dir 'results/AutoDAN-HGA/GreedyEC'

# RandEC
# python main.py \
#     --num_prompts 200 \
#     --eval_type empirical \
#     --mode suffix \
#     --max_erase 30 \
#     --randomize --sampling_ratio 0.3 \
#     --attack autodan \
#     --llm_name 'Llama-2' \
#     --results_dir 'results/AutoDAN-HGA/RandEC'