defaults:
  - _self_

mode: extract_pretrain_data

judgement: 'both'

threshold: 5
is_filter: true
is_multi_paths: false

data:
  path: '...'  # csv file after guard the raw data by llama-guard-2
  save_path: '...'  # for a single file

  # for multi files
  paths:
    - '...'
