llm_extraction:
  provider: "openai"
  name: "gpt-5"
  temperature: 1.0
  max_retries: 3
  batch_size: 10
  max_workers: 25
  base_delay: 1.0
  track_cost: true
  max_budget: 50.0
  dotenv_path: ".env"

data_preprocessing:
  target_column: "text"
  drop_target_column: false
  pandas_score_filter: "delm_score > 0"
  splitting:
    type: "ParagraphSplit"
  scoring:
    type: "KeywordScorer"
    keywords: ["price", "cost", "market", "commodity", "oil", "gas", "steel", "copper", "aluminum", "gold"]

schema:
  spec_path: "commodity_schema.yaml"
  container_name: "commodity_prices"
  prompt_template: |
    # Instructions
      Given an excerpt from an investor call transcript, identify and record all instances where a firm representative mentions a definite numeric price for a good. A good is something you can reasonably assume is traded in a market. Ignore instances without a numeric price.

      ## Guidelines

      ### Speaker Verification
      - Ensure the statement comes from a firm representative (e.g., CEO, CFO), not from a third party like an external analyst or an unidentified speaker. The speaker's name and affiliation are often mentioned at the start.
      - Exclude any prices mentioned by external analysts or third parties; only include prices mentioned by firm representatives.

      ### Capture Multiple Instances
      - If a statement contains multiple prices or goods, record each instance separately.
    {variables}

    {text}

semantic_cache:
  backend: "sqlite"
  path: "./cache"
  max_size_mb: 100
  synchronous: "normal"
