# Configuration for Intelligent Document Processing System

# Decision Thresholds
thresholds:
  gpa_threshold: 3.0
  min_credits: 90
  abstain_threshold: 0.7
  completeness_threshold: 0.8
  
  # Per-program overrides
  program_rules:
    computer_science:
      gpa_threshold: 3.2
      min_credits: 90
      min_math_credits: 12
    engineering:
      gpa_threshold: 3.1
      min_credits: 95
      min_math_credits: 15
    business:
      gpa_threshold: 2.8
      min_credits: 85
      min_math_credits: 6

# OCR Configuration
ocr:
  backend: "auto"  # auto|pdfminer|simulated|pytesseract
  fallback_backend: "simulated"
  timeout_seconds: 30
  
# Processing Paths
paths:
  incoming: "incoming/"
  processed: "processed/"
  rejected: "rejected/"
  archive: "archive/"
  logs: "logs/"
  results: "results/"
  
# Service Configuration
service:
  watch_interval_seconds: 5
  max_retries: 3
  batch_size: 10
  
# Grade Point Mapping
grade_points:
  "A+": 4.0
  "A": 4.0
  "A-": 3.7
  "B+": 3.3
  "B": 3.0
  "B-": 2.7
  "C+": 2.3
  "C": 2.0
  "C-": 1.7
  "D+": 1.3
  "D": 1.0
  "D-": 0.7
  "F": 0.0
  
# Feature Extraction
features:
  academic_weight: 0.6
  experience_weight: 0.25
  narrative_weight: 0.15
  
# Model Configuration
model:
  type: "logistic_regression"
  random_state: 42
  max_iter: 1000
  
# Calibration
calibration:
  method: "temperature_scaling"
  validation_split: 0.15
  
# Logging
logging:
  level: "INFO"
  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
  
# Synthetic Data Generation
synthetic:
  num_transcripts: 1000
  num_resumes: 500
  num_statements: 300
  gpa_mean: 3.2
  gpa_std: 0.6
  random_seed: 42