meta:
  name: MolSAE-SMARTS
  version: 1.1
  profile_default: leadlike
  description: >
    Composition of SMARTS for functional groups, substituents, and ring classes,
    tuned for lead-like ZINC20 (RO5, PAINS-removed, QED>0.4). Includes profiles
    and precedence to ensure reviewer-proof behavior with RDKit sources.

precedence:
  # Earlier entries win ties (kept); later entries are used if not already covered.
  - curated
  - rdkit_fgh
  - ring_seeds

sources:
  rdkit_fgh:
    type: rdkit_functionalgroups_txt  # loaded from RDConfig.RDDataDir/FunctionalGroups.txt
  rdkit_alerts:
    type: rdkit_filtercatalog         # provided as flags only, not merged by default
    sets: [PAINS, BRENK, NIH]

profiles:
  leadlike:
    include_prefixes: ["fg:", "ring:", "sat_ring:", "subst:"]  # classes we want active
    drop: [
      "charge:quaternary_ammonium",
      "fg:anhydride",
      "fg:sulfonyl_chloride",
      "fg:azide",
      "fg:oxime",
      "fg:phosphonate",
      "fg:phosphinate"
    ]
  all:
    include_prefixes: ["fg:", "ring:", "sat_ring:", "subst:", "charge:"]
    drop: []

curated:
  # ---------- Amines / cations (exclude amide N) ----------
  - id: fg:primary_amine
    smarts: "[NX3;H2;!$([NX3]-[CX3](=O))]"
  - id: fg:secondary_amine
    smarts: "[NX3;H1;!$([NX3]-[CX3](=O))]"
  - id: fg:tertiary_amine
    smarts: "[NX3;H0;!$([NX3]-[CX3](=O))]"
  - id: charge:quaternary_ammonium
    smarts: "[N+](C)(C)(C)C"

  # ---------- Amides & neighbors (split) ----------
  - id: fg:amide_any
    smarts: "[CX3](=O)[NX3]"
  - id: fg:amide_primary
    smarts: "[CX3](=O)[NX3;H2,H1]"
  - id: fg:amide_secondary
    smarts: "[CX3](=O)[NX3;H1]([#6])"
  - id: fg:amide_tertiary
    smarts: "[CX3](=O)[NX3]([#6])[#6]"
  - id: fg:anilide
    smarts: "c[CX3](=O)[NX3]"
  - id: fg:urea
    smarts: "[NX3][CX3](=O)[NX3]"
  - id: fg:carbamate
    smarts: "[OX2][CX3](=O)[NX3]"
  - id: fg:imide
    smarts: "[NX3][CX3](=O)[NX3][CX3](=O)"

  # ---------- Carboxyl / esters ----------
  - id: fg:carboxylic_acid
    smarts: "[CX3](=O)[OX2H1]"
  - id: fg:carboxylate
    smarts: "[CX3](=O)[O-]"
  - id: fg:ester
    smarts: "[CX3](=O)[OX2][#6]"
  - id: fg:anhydride
    smarts: "[CX3](=O)O[CX3](=O)"

  # ---------- Carbonyl classes ----------
  - id: fg:aldehyde
    smarts: "[CX3H1](=O)[#6]"
  - id: fg:ketone
    smarts: "[CX3](=O)[#6]"

  # ---------- Oxygen functional groups ----------
  - id: fg:ether
    smarts: "[OX2]([#6])[#6]"
  - id: fg:phenol
    smarts: "c[OX2H]"
  - id: fg:alcohol
    smarts: "[OX2H][#6]"

  # ---------- Sulfur ----------
  - id: fg:sulfone
    smarts: "S(=O)(=O)[#6]"
  - id: fg:sulfoxide
    smarts: "S(=O)[#6]"
  - id: fg:sulfonamide_any
    smarts: "S(=O)(=O)[NX3]"
  - id: fg:sulfonamide_primary
    smarts: "S(=O)(=O)[NX3;H2]"
  - id: fg:sulfonamide_secondary
    smarts: "S(=O)(=O)[NX3;H1]([#6])"
  - id: fg:sulfonamide_tertiary
    smarts: "S(=O)(=O)[NX3]([#6])[#6]"
  - id: fg:sulfonate_ester
    smarts: "S(=O)(=O)O[#6]"
  - id: fg:sulfonyl_chloride
    smarts: "S(=O)(=O)Cl"

  # ---------- N/O/X small polar / electrophiles ----------
  - id: fg:nitrile
    smarts: "[CX2]#N"
  - id: fg:nitro
    smarts: "[NX3+](=O)[O-]"
  - id: fg:azo
    smarts: "N=N"
  - id: fg:azide
    smarts: "N=[N+]=N"
  - id: fg:oxime
    smarts: "[CX3](=N[OX2H0])[OX2H]"

  # ---------- Phosphorus ----------
  - id: fg:phosphonate
    smarts: "P(=O)(O)O"
  - id: fg:phosphinate
    smarts: "P(=O)(O)[#6]"

  # ---------- Substituents / halogens / fluorinated ----------
  - id: subst:aryl_halide
    smarts: "a-[F,Cl,Br,I]"
  - id: subst:alkyl_halide
    smarts: "[#6]-[F,Cl,Br,I]"
  - id: subst:vinyl_halide
    smarts: "C=C-[F,Cl,Br,I]"
  - id: subst:trifluoromethyl
    smarts: "[CX4](F)(F)F"
  - id: subst:difluoromethyl
    smarts: "[CX4H](F)F"

  # ---------- Guanidino / amidino ----------
  - id: fg:amidine
    smarts: "[NX3][CX3](=N)[NX3]"
  - id: fg:guanidine
    smarts: "NC(=N)N"

ring_seeds:
  # Aromatic 5/6 rings and benzo-fused classes
  - id: ring:benzene         ; smiles: "c1ccccc1"
  - id: ring:pyridine        ; smiles: "n1ccccc1"
  - id: ring:pyrazine        ; smiles: "n1ccncc1"
  - id: ring:pyrimidine      ; smiles: "n1cnccc1"
  - id: ring:triazine_1_3_5  ; smiles: "n1cncnc1"
  - id: ring:imidazole       ; smiles: "c1n[cH]nc1"
  - id: ring:pyrazole        ; smiles: "c1nnc[cH]1"
  - id: ring:oxazole         ; smiles: "c1noc[cH]1"
  - id: ring:isoxazole       ; smiles: "c1nocc1"
  - id: ring:thiazole        ; smiles: "c1nsc[cH]1"
  - id: ring:isothiazole     ; smiles: "c1nscc1"
  - id: ring:oxadiazole_1_3_4; smiles: "c1nno[cH]1"
  - id: ring:oxadiazole_1_2_4; smiles: "n1nocc1"
  - id: ring:triazole_1_2_4  ; smiles: "n1nncc1"
  - id: ring:triazole_1_2_3  ; smiles: "n1n[nH]cc1"
  - id: ring:tetrazole       ; smiles: "n1nnnc1"
  - id: ring:indole          ; smiles: "c1ccc2[nH]ccc2c1"
  - id: ring:benzimidazole   ; smiles: "c1ccc2[nH]ncc2c1"
  - id: ring:benzothiazole   ; smiles: "c1ccc2nsccc2c1"
  - id: ring:benzoxazole     ; smiles: "c1ccc2noccc2c1"
  - id: ring:quinoline       ; smiles: "c1ccc2ncccc2c1"
  - id: ring:isoquinoline    ; smiles: "c1ccc2ccncc2c1"
  - id: ring:quinazoline     ; smiles: "c1ccc2ncncc2c1"
  - id: ring:quinoxaline     ; smiles: "c1ccc2nccnc2c1"

  # Saturated / partially saturated heterocycles
  - id: sat_ring:piperidine  ; smiles: "N1CCCCC1"
  - id: sat_ring:piperazine  ; smiles: "N1CCNCC1"
  - id: sat_ring:morpholine  ; smiles: "O1CCNCC1"
  - id: sat_ring:pyrrolidine ; smiles: "N1CCCC1"
  - id: sat_ring:azetidine   ; smiles: "N1CCC1"
  - id: sat_ring:azepane     ; smiles: "N1CCCCCC1"
  - id: sat_ring:oxetane     ; smiles: "O1CCC1"
  - id: sat_ring:thietane    ; smiles: "S1CCC1"
  - id: sat_ring:1_4_dioxane ; smiles: "O1CCOCC1"
