# Antislop-vLLM Configuration

# Note: values here will be overridden if cmdline params are specified.

# specify the url your vllm openai-compatible api is running at:
api_base_url: "http://localhost:8000/v1"
api_key: "xxx"

model_name: "unsloth/gemma-3-1b-it"

# The model id or path containing the tokeniser used for applying the chat template:
chat_template_model_id: "unsloth/gemma-3-1b-it"

# Slop Configuration

# slop phrases == list of strings to ban
# - case insensitive
# To trigger a ban, the sequence must not have a word-like character
#    (not punctuation or whitespace) directly on either side. That is to say, we
#    are not banning disallowed sequences that occur as substrings in longer
#    words. The exception is if the banned string is already bookended by
#    a non-word character.
#
#    Examples: 
#    banned string "cat"
#      - won't trigger a ban for "cation"
#     - will trigger a ban on "cat[morecat]"
#   banned string "cat["
#     - *will* trigger a ban on "cat[morecat]", because the banned string
#        ends with a non-word character.
slop_phrases_file: "banlists/slop_phrases.json"
top_n_slop_phrases: 200 # Use top N phrases from the file (assumes file is ordered)

# The prompt template wraps the prompts when generating from a dataset.
# To use the original prompt exactly, set the template to "{prompt}"
prompt_template: "Writing prompt: {prompt}\n\nWrite 1000 words to this prompt. Your response:\n"
system_prompt: ""        # optional; left empty → no system prompt

# Generation Parameters
generation_params:
  threads: 40
  request_mode: chunk      # chunk | stream (stream might be buggy)
  chunk_size: 20  # if using chunk request_mode
  top_logprobs_count: 20
  max_new_tokens: 600
  temperature: 1.0
  top_p: 1.0
  top_k: 50
  min_p: 0.03
  
  timeout: 120        # seconds
  stop_sequences: []
  # If true, the banlist only takes effects on whole words/phrases that
  # are bounded by non-word characters.
  # Examples:
  # Our banlist is ["bill"]
  # If set to true, a ban triggers on "bill" but not on "billion"
  # If set to false, a ban triggers anytime "bill" occurs as a substring, 
  # without caring about word boundaries.
  match_requires_word_boundaries: true

# Refusal detection (batch mode only)
# This avoids adding refused generations to the generated dpo dataset
enable_refusal_detection: true

# Backtracking Configuration
backtracking:
  # If set to true:
  #   when resampling after backtracking, if we don't find a valid replacement token
  #   we progressively disable sampling options (temp, then min_p, then top_p, then top_k)
  #   until we find a non-banned replacement or run out of candidates.
  force_backtrack: true 

# N-Gram Validator Configuration (Optional)
ngram_validator:
  # Option 1: Specify a file containing a JSON list of banned n-grams
  # Each item in the JSON list can be a string (e.g., "this is an n gram")
  # or a list of strings (e.g., ["this", "is", "an", "n", "gram"])
  # banned_file: "banned_ngrams.json"

  # Option 2: Specify the list directly in the config
  # This takes precedence if banned_file is also specified but CLI overrides file.
  # CLI --ngram-banned-list overrides this.  
  banned_file: "banlists/banned_ngrams.json"

  #banned_list:
    # - "as an AI language model" # Will be tokenized
    # - "I am programmed"
    # - ["repeated", "phrase", "here"] # Already tokenized
    # - "feel free to ask"
  
  remove_stopwords: true # Default: true. Whether to remove stopwords before matching.
  language: "english"    # Default: "english". Language for stopwords.

# optional:
regex_blocklist_file: "banlists/regex_not_x_but_y.json"

# optional: output tokenwise dpo pairs
#ftpo_pairs_jsonl: "results/ftpo_pairs_run1.jsonl"

# Logging
logging_level: INFO