"""
This file contains instructions for submitting to JailbreakBench.
Example prompts (which were generated using GCG using the
JailbreakBench dataset) are provided in the `prompts` subdirectory.

Running this file generates two folders:
    1. `logs`, which contains generated metadata for each jailbreak
    2. `submission`, which contains the files necessary for submission
After running this script, open a pull request and attach `submission/submission.json`.
"""

import json

import jailbreakbench as jbb

# Read in the behaviors from the JBB-Behaviors dataset
dataset = jbb.read_dataset()
behaviors = dataset.behaviors

# Load Vicuna and Llama2 jailbreak strings
with open("prompts/vicuna.json") as f:
    vicuna_prompts = json.load(f)
with open("prompts/llama2.json") as f:
    llama2_prompts = json.load(f)

# Create a dictionary containing the JailbreakBench behaviors
# and the corresponding jailbreak strings for Vicuna and Llama2
#   * Note: You can optionally add jailbreaks for the following
# models: gpt-3.5-turbo-1106 and gpt-4-0125-preview
all_prompts = {
    "vicuna-13b-v1.5": {b: s for b, s in zip(behaviors, vicuna_prompts)},
    "llama-2-7b-chat-hf": {b: s for b, s in zip(behaviors, llama2_prompts)},
}

# Evaluate the performance of your jailbreak strings against the
# JailbreakBench classifier.  Note that for this step to work, you need
# to export your together.ai API key as an environment variable, e.g.,
#
# export TOGETHER_API_KEY="<your-together-ai-api-key>"
#
# All of the relevant output will be saved to a folder called `logs/`
# in your current working directory.
#   * Note 1: If you'd like to run this step locally rather than
# with together.AI's API, install vllm and set `llm_provider="vllm"`.
#   * Note 2: If you'd like to run a defense, set the `defense` flag.  For
# example, to run the SmoothLLM defense, set `defense="SmoothLLM`.
#   * Note 3: If you are submitting strings for a GPT model, ensure that you
# export your OPENAI_API_KEY in the same way as above.
evaluation = jbb.evaluate_prompts(all_prompts, llm_provider="litellm")

# Create a dictionary containing the parameters of your attack
method_params = {
    "n-iter": 500,
    "batch-size": 512,
    "judge-model": "jailbreakbench",
    "target-max-n-tokens": 150,
}

# Create your submission to JailbreakBench. The output of this step will be
# stored in a folder called `submissions` in your current working directory.
jbb.create_submission(method_name="GCG", attack_type="white_box", method_params=method_params)

# To submit to our leaderboard, please create a pull request and attach
# `submissions/submission.json`
