_default:
  generation_config:
    temperature: 0.0
    top_p: 1.0
    top_k: 0
    max_new_tokens: 2048
    num_return_sequences: 1
  seed: 0

actor:
  name: actor
  type: discrete
  placement: multi-turn
  seed: 0
  attack_model:
    temperature: 0.0
    use_api: false
    api_model_name: null # gpt-4o
    id: mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
    tokenizer_id: meta-llama/Meta-Llama-3.1-8B-Instruct
    short_name: Llama
    developer_name: mlabonne
    compile: False
    dtype: bfloat16
    chat_template: llama-3-instruct
    trust_remote_code: True
  judge_model:
    use_api: false
    target_model_holder: Meta
    id: mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated
    tokenizer_id: meta-llama/Meta-Llama-3.1-8B-Instruct
    short_name: Llama
    developer_name: mlabonne
    compile: False
    dtype: bfloat16
    chat_template: llama-3-instruct
    trust_remote_code: True
  actor_num: 3
  early_stop: true
  dynamic_modify: true
ample_gcg:
  name: ample_gcg
  type: discrete
  version: 0.0.1
  generation_config: ${attacks._default.generation_config}
  seed: ${attacks._default.seed}
  num_steps: 200
  prompter_lm:
    id: osunlp/AmpleGCG-llama2-sourced-llama2-7b-chat
    tokenizer_id: osunlp/AmpleGCG-llama2-sourced-llama2-7b-chat
    chat_template: null
    short_name: ample_gcg
    developer_name: osunlp
    batch_size: 2
    dtype: bfloat16
    attn_implementation: null
    trust_remote_code: True
    compile: True
    generation_config:
      do_sample: false
      max_new_tokens: 20
      min_new_tokens: 20
      diversity_penalty: 1.0
autodan:
  name: autodan
  type: discrete
  version: 0.0.1
  generation_config: ${attacks._default.generation_config}
  seed: ${attacks._default.seed}
  num_steps: 100
  batch_size: 128
  eval_steps: 5
  num_elites: 0.05
  crossover: 0.5
  num_points: 5
  mutation: 0.01
  mutate_model:
    id: null
    tokenizer_id: null
    short_name: null
    developer_name: null
    chat_template: null
    dtype: bfloat16
    compile: false
    trust_remote_code: True
beast:
  name: beast
  type: discrete
  version: 0.0.1
  generation_config: ${attacks._default.generation_config}
  seed: ${attacks._default.seed}
  num_steps: 40
  optim_str_init: ""
  k1: 15
  k2: 15
  search_temperature: 1.0
  allow_non_ascii: True
  allow_special: False
  use_prefix_cache: True
direct:
  name: direct
  type: discrete
  version: 0.0.1
  generation_config: ${attacks._default.generation_config}
  seed: ${attacks._default.seed}
gcg:
  name: gcg
  type: discrete
  version: 0.0.1
  placement: suffix
  generation_config: ${attacks._default.generation_config}
  seed: ${attacks._default.seed}
  num_steps: 250
  batch_size: 512
  optim_str_init: "x x x x x x x x x x x x x x x x x x x x" # all tested tokenizers convert this to exactly 20 tokens
  search_width: 512
  topk: 256
  n_replace: 1
  buffer_size: 0
  loss: ce
  use_constrained_gradient: False
  mellowmax_alpha: 1.0
  early_stop: False
  use_prefix_cache: True
  allow_non_ascii: False
  allow_special: False
  filter_ids: True
  verbosity: WARNING
  token_selection: default
  grow_target: False
  grad_smoothing: 1
  grad_momentum: 0.0   # 0.5 works quite well
gcg_refusal:
  name: gcg_refusal
  type: discrete
  version: 0.0.1
  placement: suffix
  max_new_tokens: ${attacks._default.generation_config.max_new_tokens}
  seed: ${attacks._default.seed}
  num_steps: 250
  batch_size: 512
  optim_str_init: "x x x x x x x x x x x x x x x x x x x x" # all tested tokenizers convert this to exactly 20 tokens
  search_width: 512
  topk: 256
  n_replace: 1
  buffer_size: 0
  use_mellowmax: False
  use_constrained_gradient: False
  mellowmax_alpha: 1.0
  early_stop: False
  use_prefix_cache: True
  allow_non_ascii: False
  allow_special: False
  filter_ids: True
  verbosity: WARNING
  token_selection: default
  max_new_target_tokens: 64
  grow_target: False
gcg_judge:
  name: gcg_judge
  type: discrete
  version: 0.0.1
  placement: suffix
  max_new_tokens: ${attacks._default.generation_config.max_new_tokens}
  seed: ${attacks._default.seed}
  num_steps: 250
  batch_size: 512
  optim_str_init: "x x x x x x x x x x x x x x x x x x x x" # all tested tokenizers convert this to exactly 20 tokens
  search_width: 512
  topk: 256
  n_replace: 1
  buffer_size: 0
  use_constrained_gradient: False
  early_stop: False
  use_prefix_cache: True
  allow_non_ascii: False
  allow_special: False
  filter_ids: True
  verbosity: WARNING
  token_selection: default
human_jailbreaks:
  name: human_jailbreaks
  type: discrete
  version: 0.0.1
  generation_config: ${attacks._default.generation_config}
  seed: ${attacks._default.seed}
  num_steps: 1
pair:
  name: pair
  type: discrete
  version: 0.0.1
  seed: ${attacks._default.seed}
  generation_config: ${attacks._default.generation_config}
  num_streams: 1
  keep_last_num: 3
  num_steps: 20
  attack_model:
    id: lmsys/vicuna-13b-v1.5
    tokenizer_id: lmsys/vicuna-13b-v1.5
    dtype: bfloat16
    compile: false
    trust_remote_code: True
    short_name: vicuna
    developer_name: lmsys
    max_attempts: 10
    max_new_tokens: 512
    temperature: 1
    chat_template: vicuna
    top_p: 0.9
  target_model:
    max_new_tokens: 256
    temperature: 0
    top_p: 1
  judge_model:
    id: meta-llama/Meta-Llama-3.1-8B-Instruct
    tokenizer_id: meta-llama/Meta-Llama-3.1-8B-Instruct
    short_name: "Llama"
    developer_name: "Meta"
    compile: False
    dtype: bfloat16
    chat_template: llama-3-instruct
    trust_remote_code: True
pgd:
  name: pgd
  type: continuous
  version: 0.0.1
  placement: suffix
  generation_config: ${attacks._default.generation_config}
  seed: ${attacks._default.seed}
  num_steps: 100
  optim_str_init: "x x x x x x x x x x x x x x x x x x x x" # all tested tokenizers convert this to exactly 20 tokens
  epsilon: 1.
  alpha: 0.001
  embedding_scale: null
  normalize_alpha: true
  normalize_gradient: false
  original_model: null
  tie_logits: 0.0
  tie_features: 0.0
  projection: l2
  loss: ce
  attack_space: embedding
  optimizer: "FGSM"
  optimizer_config:
    weight_decay: 0.0
    betas: [0.9, 0.999]
pgd_discrete:
  name: pgd_discrete
  type: hybrid
  version: 0.0.1
  placement: suffix
  num_steps: 100
  generation_config: ${attacks._default.generation_config}
  seed: ${attacks._default.seed}
  optim_str_init: "x x x x x x x x x x x x x x x x x x x x" # all tested tokenizers convert this to exactly 20 tokens
  optimizer: "Adam"  # "Adam" or "SAM"
  projection: "simplex"  # "simplex" or "l2" or "l1"
  alpha: 0.001
  restart_every: 100
  lr_scheduler:
    type: "constant"  # "constant" or "cosine"
    # constant
    factor: 1.0
    total_iters: 100
    # cosine
    eta_min: 0.325
    T_0: 60
prefilling:
  name: prefilling
  type: discrete
  version: 0.0.1
  seed: ${attacks._default.seed}
  num_steps: 1
  batch_size: 300
  generation_config: ${attacks._default.generation_config}
random_search:
  name: random_search
  type: discrete
  version: 0.0.2
  placement: "suffix"
  seed: ${attacks._default.seed}
  num_steps: 100
  neighborhood_radius: 1
  candidates_per_generation: 8
  optim_str_init: "x x x x x x x x x x x x x x x x x x x x" # all tested tokenizers convert this to exactly 20 tokens
  generation_config: ${attacks._default.generation_config}
reinforce:
  name: reinforce
  type: discrete
  version: 0.0.1
  generation_config: ${attacks._default.generation_config}
  seed: ${attacks._default.seed}
  num_steps: 100
  batch_size: 512
  optim_str_init: "x x x x x x x x x x x x x x x x x x x x" # all tested tokenizers convert this to exactly 20 tokens
  search_width: 512
  topk: 256
  n_replace: 1
  buffer_size: 0
  judge_model: strong_reject
  loss: ce
  use_constrained_gradient: False
  mellowmax_alpha: 1.0
  early_stop: False
  use_prefix_cache: True
  allow_non_ascii: False
  allow_special: False
  filter_ids: True
  verbosity: WARNING
  token_selection: default
  grow_target: False
  grad_smoothing: 1
  grad_momentum: 0.0   # 0.5 works quite well

