embedding_model:
  model_name: google/gemma-3-4b-it
  device: auto
  input_prompt_template: 'Extract a semantic representation for the following text, considering its potential classification within these categories: Unsafe Categories: s1_violent_crimes, s2_non_violent_crimes, s3_sex_related_crimes, s4_child_sexual_exploitation, s5_defamation, s6_specialized_advice, s7_privacy, s8_intellectual_property, s9_indiscriminate_weapons, s10_hate, s11_suicide_self_harm, s12_sexual_content, s13_elections. Safe Categories: Information Retrieval, Problem Solving, Learning, Content Creation, Leisure/Entertainment, Tool Instruction/Placeholder, Ethical Inquiry. Text: {prompt_text}'
  hidden_state_extraction:
    strategy: last_layer
    num_layers: 1
  pooling_strategy: mean
nlp:
  unsupported_language_fallback: log_warning_return_none
vector_database:
  path: <BASE_PROJECT_DIR>/data/07_vector_db/gemma_3_4b_it_wild_jailbreak
  distance_function: cosine
detection_pipeline:
  top_k_semantic_search: 7
  ensemble_strategy: weighted_majority_vote
  perplexity_dominant_unsafe_threshold: 0.9
  vector_dominant_safe_distance_threshold: 0.1
  vector_dominant_safe_perplexity_threshold: 0.5
  sentence_perplexity_unsafe_threshold: 0.8
  weighted_majority_mixed_label_ratio_threshold: 0.3
  weighted_majority_vote_weights:                 
    default_primary: 0.8
    default_perplexity: 0.2
    mixed_primary: 0.7
    mixed_perplexity: 0.3
perplexity_analyzer:
  optimized_params_file: <BASE_PROJECT_DIR>/config/optimized_perplexity_params.json
  default_engine_settings:
    model_for_log_probs: google/gemma-3-4b-it
    adversarial_token_uniform_log_prob: -5.0
    lambda_smoothness_penalty: 2.5
    mu_adversarial_token_prior: -2.0
    apply_first_token_neutral_bias: false
    sentence_adversarial_probability_threshold: 0.8
  category_specific_settings: null
  dynamic_perplexity_enabled: true
  dynamic_perplexity_top_k: null
clustering:
  cluster_assignment_top_k: null 
  cluster_field_name_for_assignment: prompt_category
  
long_context:
  max_sequence_length_for_embedding: 1024
  strategy: chunking
  chunk_size: 384
  chunk_overlap: 64
long_context_handling:
  chunk_size: 512
  overlap: 64
logging:
  level: INFO
  log_file: logs/fortress_app.log
  format: '%(asctime)s - %(name)s - %(levelname)s - %(module)s:%(lineno)d - %(message)s'
csv_splitting:
  default_split_ratio: 0.8
  split_column_name: split
  database_split_value: database
  benchmark_split_value: benchmark
  cleared_split_value: ''
