defaults:
  - _self_

hydra:
  run:
    dir: "runs"

experiment:
  n_splits: 5
  n_jobs: 8
  classifiers:
    - catboost

  file_dir: "outputs_final"
  out_dir: "logs"
  secret_types:
    - all

  outliers:
    - clip

  configs:

    - key: "dolma dolma_arxiv OLMo-7B-0424-hf"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'dolma_arxiv' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - allenai/OLMo-7B-0424-hf

    - key: "dolma dolma_c4 OLMo-7B-0424-hf"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'dolma_c4' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - allenai/OLMo-7B-0424-hf

    - key: "dolma dolma_falcon OLMo-7B-0424-hf"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'dolma_falcon' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - allenai/OLMo-7B-0424-hf

    - key: "dolma dolma_megawika OLMo-7B-0424-hf"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'dolma_megawika' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - allenai/OLMo-7B-0424-hf

    - key: "dolma dolma_pes2o OLMo-7B-0424-hf"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'dolma_pes2o' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - allenai/OLMo-7B-0424-hf

    - key: "dolma dolma_algebraic-stack-train OLMo-7B-0424-hf"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'dolma_algebraic-stack-train' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - allenai/OLMo-7B-0424-hf

    - key: "dolma dolma_open-web-math-train OLMo-7B-0424-hf"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'dolma_open-web-math-train' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - allenai/OLMo-7B-0424-hf

    - key: "proof-val OLMo-7B-0424-hf"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: any(f in file_path for f in ['proof-pile-2_validation', 'proof-pile-2_test'])"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - allenai/OLMo-7B-0424-hf

    - key: "pile-val pythia-12b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile' in file_path and 'pile-train' not in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-12b-deduped

    - key: "pile-train pythia-12b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-12b-deduped

    - key: "pile-val Github pythia-12b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile' in file_path and 'pile-train' not in file_path and 'Github' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-12b-deduped

    - key: "pile-val StackExchange pythia-12b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile' in file_path and 'pile-train' not in file_path and 'StackExchange' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-12b-deduped

    - key: "pile-train Github pythia-12b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'Github' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-12b-deduped

    - key: "pile-train StackExchange pythia-12b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'StackExchange' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-12b-deduped

    - key: "pile-train Pile-CC pythia-12b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'Pile-CC' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-12b-deduped

    - key: "pile-train UbuntuIRC pythia-12b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'UbuntuIRC' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-12b-deduped

    - key: "pile-train ArXiv pythia-12b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'ArXiv' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-12b-deduped

    - key: "pile-train PubMedCentral pythia-12b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'PubMedCentral' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-12b-deduped

    - key: "pile-train HackerNews pythia-12b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'HackerNews' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-12b-deduped

    - key: "pile-val pythia-6.9b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile' in file_path and 'pile-train' not in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-6.9b-deduped

    - key: "pile-train pythia-6.9b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-6.9b-deduped

    - key: "pile-val Github pythia-6.9b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile' in file_path and 'pile-train' not in file_path and 'Github' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-6.9b-deduped

    - key: "pile-val StackExchange pythia-6.9b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile' in file_path and 'pile-train' not in file_path and 'StackExchange' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-6.9b-deduped

    - key: "pile-train Github pythia-6.9b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'Github' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-6.9b-deduped

    - key: "pile-train StackExchange pythia-6.9b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'StackExchange' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-6.9b-deduped

    - key: "pile-train Pile-CC pythia-6.9b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'Pile-CC' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-6.9b-deduped

    - key: "pile-train UbuntuIRC pythia-6.9b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'UbuntuIRC' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-6.9b-deduped

    - key: "pile-train ArXiv pythia-6.9b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'ArXiv' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-6.9b-deduped

    - key: "pile-train PubMedCentral pythia-6.9b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'PubMedCentral' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-6.9b-deduped

    - key: "pile-train HackerNews pythia-6.9b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'HackerNews' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-6.9b-deduped

    - key: "pile-val pythia-2.8b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile' in file_path and 'pile-train' not in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-2.8b-deduped

    - key: "pile-train pythia-2.8b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-2.8b-deduped

    - key: "pile-val Github pythia-2.8b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile' in file_path and 'pile-train' not in file_path and 'Github' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-2.8b-deduped

    - key: "pile-val StackExchange pythia-2.8b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile' in file_path and 'pile-train' not in file_path and 'StackExchange' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-2.8b-deduped

    - key: "pile-train Github pythia-2.8b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'Github' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-2.8b-deduped

    - key: "pile-train StackExchange pythia-2.8b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'StackExchange' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-2.8b-deduped

    - key: "pile-train Pile-CC pythia-2.8b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'Pile-CC' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-2.8b-deduped

    - key: "pile-train UbuntuIRC pythia-2.8b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'UbuntuIRC' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-2.8b-deduped

    - key: "pile-train ArXiv pythia-2.8b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'ArXiv' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-2.8b-deduped

    - key: "pile-train PubMedCentral pythia-2.8b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'PubMedCentral' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-2.8b-deduped

    - key: "pile-train HackerNews pythia-2.8b-deduped"
      number_of_members: 100
      number_of_nonmembers: 127
      file_filter_condition: "lambda file_path: 'pile-train' in file_path and 'HackerNews' in file_path"
      filter_features: "lambda x: any('_'.join(x.split('_')[1:]) == a for a in ['recall_maxk_0.05', 'recall_maxk_0.1', 'real_recall_maxk++_0.85'])"
      model_list:
        - EleutherAI/pythia-2.8b-deduped

