# drop_cot_pass5.yaml
task: drop_cot
dataset_path: ucinlp/drop
output_type: generate_until
test_split: validation

doc_to_text: |
  Passage:
  {{passage if passage is defined else context}}

  Question: {{question}}

  Think step by step, then finish with one line exactly in the form:
  The answer is: <answer>

# Emit ONE canonical target: number > date > first span
doc_to_target: |
  {%- set gold = "" -%}
  {%- if number is defined and (number|string) -%}
    {%- set gold = number|string -%}
  {%- elif date is defined and date is mapping -%}
    {%- set y = date.get('year','') -%}
    {%- set m = date.get('month','') -%}
    {%- set d = date.get('day','') -%}
    {%- set gold = (m ~ ' ' ~ d ~ ' ' ~ y).strip() -%}
  {%- elif answers_spans is defined -%}
    {%- if answers_spans is mapping and 'spans' in answers_spans -%}
      {%- for s in answers_spans.spans -%}
        {%- if not gold and s -%}{%- set gold = s -%}{%- endif -%}
      {%- endfor -%}
    {%- elif answers_spans is iterable -%}
      {%- for s in answers_spans -%}
        {%- if not gold and s -%}{%- set gold = s -%}{%- endif -%}
      {%- endfor -%}
    {%- endif -%}
  {%- endif -%}
  {{ gold | default('') | string }}

# 4-stage semantic checker across k generations (any-of-5)
metric_list:
  - metric: !function utils.fs4_per_gen_list
    aggregation: mean
    higher_is_better: true

generation_kwargs:
  until:
    - "\n\nQ:"
    - "</s>"
    - "<|im_end|>"
    - "Question:"
    - "Passage:"
    - "\nThe answer is:"
  do_sample: true
  temperature: 0.5
  top_p: 0.95

# Pass@5 wiring
repeats: 5
num_fewshot: 0

filter_list:
  - name: take_first_five
    filter:
      - function: take_first_k
        k: 5
