import re
from typing import Iterable, Optional, Set, Dict

# BBH任务的选项数量和类型映射
BBH_TASK_CONFIG: Dict[str, Dict] = {
    # 2选项任务
    "snarks": {"num_choices": 2, "type": "mcq"},
    "hyperbaton": {"num_choices": 2, "type": "mcq"},
    
    # 3选项任务
    "tracking_shuffled_objects_three_objects": {"num_choices": 3, "type": "mcq"},
    "disambiguation_qa": {"num_choices": 3, "type": "mcq"},
    "logical_deduction_three_objects": {"num_choices": 3, "type": "mcq"},
    
    # 4选项任务
    "temporal_sequences": {"num_choices": 4, "type": "mcq"},
    
    # 5选项任务
    "logical_deduction_five_objects": {"num_choices": 5, "type": "mcq"},
    "tracking_shuffled_objects_five_objects": {"num_choices": 5, "type": "mcq"},
    "penguins_in_a_table": {"num_choices": 5, "type": "mcq"},
    
    # 6选项任务
    "date_understanding": {"num_choices": 6, "type": "mcq"},
    "movie_recommendation": {"num_choices": 6, "type": "mcq"},
    "salient_translation_error_detection": {"num_choices": 6, "type": "mcq"},
    "ruin_names": {"num_choices": 6, "type": "mcq"},
    
    # 7选项任务
    "tracking_shuffled_objects_seven_objects": {"num_choices": 7, "type": "mcq"},
    "logical_deduction_seven_objects": {"num_choices": 7, "type": "mcq"},
    
    # 11选项任务
    "geometric_shapes": {"num_choices": 11, "type": "mcq"},
    
    # 18选项任务
    "reasoning_about_colored_objects": {"num_choices": 18, "type": "mcq"},
    
    # 自由问答任务（无固定选项）
    "multistep_arithmetic_two": {"type": "freeform", "special": "number"},
    "navigate": {"type": "freeform", "bool_style": ("yesno", "lower")},
    "dyck_languages": {"type": "freeform", "special": "dyck"},
    "word_sorting": {"type": "freeform", "special": "word_sorting"},
    "sports_understanding": {"type": "freeform", "bool_style": ("yesno", "lower")},
    "boolean_expressions": {"type": "freeform", "bool_style": ("truefalse", "title")},
    "object_counting": {"type": "freeform", "special": "number"},  # 答案范围0-18
    "formal_fallacies": {"type": "freeform", "bool_style": ("validinvalid", "title")},
    "causal_judgement": {"type": "freeform", "bool_style": ("yesno", "lower")},
    "web_of_lies": {"type": "freeform", "bool_style": ("yesno", "lower")},
}

# ---------------------- 工具函数 ------------------------
# 标准化文本中的空白字符和去除代码块
def _normalize_spaces(s: str) -> str:
    s = s.replace("\r", "\n")
    s = re.sub(r"```.*?```", "", s, flags=re.S)   # 去代码块
    s = re.sub(r"~~~.*?~~~", "", s, flags=re.S)
    s = re.sub(r"[ \t\f\v]+", " ", s)             # 压空白
    s = re.sub(r"\n+", "\n", s)
    return s.strip()

# 全角/中文括号 -> ASCII
def _normalize_brackets(s: str) -> str:
    return (s.replace("（", "(").replace("）", ")")
             .replace("〈", "<").replace("〉", ">")
             .replace("《", "<").replace("》", ">")
             .replace("【", "[").replace("】", "]")
             .replace("〔", "[").replace("〕", "]"))

# 动态调整MCQ任务的选项范围
def _letters_allowed(choices: Optional[Iterable[str]], task: Optional[str] = None) -> Set[str]:
    """
    根据任务类型和选项数量确定允许的字母范围。
    优先级：传入的choices > 任务配置 > 默认范围
    注意：只有MCQ类型的任务才需要字母范围限制
    """
    if choices:
        return {str(c).strip().upper() for c in choices}
    
    # 根据任务配置确定字母范围（仅MCQ任务）
    if task and task.lower() in BBH_TASK_CONFIG:
        config = BBH_TASK_CONFIG[task.lower()]
        if config.get("type") == "mcq" and "num_choices" in config:
            num = config["num_choices"]
            return set("ABCDEFGHIJKLMNOPQRS"[:num])
    
    # 默认范围
    return set("ABCDEFGHIJKLMNOPQR")

# 寻找锚定短语后的首行片段
def _last_after_phrases(text: str, phrases) -> Optional[str]:
    """取所有短语在文本中**最后一次**出现后的“首行”片段。"""
    # last_pos = -1
    # for p in phrases:
    #     ms = list(re.finditer(p, text, flags=re.I))
    #     if ms:
    #         last_pos = max(last_pos, ms[-1].end())

    # if last_pos >= 0:
    #     seg = text[last_pos:]
    #     seg = re.sub(r'^[\s:：,\u3001;；·•"“”\'`*]+', '', seg) # 去空白，最终返回第一行非空行
    #     for ln in seg.splitlines():
    #         if ln.strip():
    #             return ln.strip()
    # return None
    if not phrases:
        return None

    def _first_nonempty_after(pos: int) -> Optional[str]:
        seg = text[pos:]
        # 去掉开头空白和常见分隔符
        seg = re.sub(r'^[\s:：,\u3001;；·•"“”\'`*\-–—>]+', '', seg)
        for ln in seg.splitlines():
            if ln.strip():
                return ln.strip()
        return None
    
    # 1) 强锚点：phrases[0]
    strong = phrases[0]
    strong_matches = list(re.finditer(strong, text, flags=re.I))
    if strong_matches:
        return _first_nonempty_after(strong_matches[-1].end())

    # 2) 其他短语：取它们的“最后一次”出现的最大 end()
    last_pos = -1
    for p in phrases[1:]:
        ms = list(re.finditer(p, text, flags=re.I))
        if ms:
            last_pos = max(last_pos, ms[-1].end())

    if last_pos >= 0:
        return _first_nonempty_after(last_pos)

    return None

# 当答案在上下文中被否定，则判为 False
def _negated_near(text: str, start_idx: int, letter: Optional[str] = None) -> bool:
    """仅当出现类似 'not A' / '不是A' 且与当前字母一致时才判为否定。"""
    window = text[max(0, start_idx - 24):start_idx].lower()
    if not any(tok in window for tok in ["not", "不是", "不选", "非"]):
        return False
    if letter:
        if re.search(rf"not\s*\(?\s*{re.escape(letter.lower())}\s*\)?", window):
            return True
        if any(x in window for x in [f"不是{letter.lower()}", f"不是{letter.upper()}",
                                     f"不选{letter.lower()}", f"不选{letter.upper()}",
                                     f"非{letter.lower()}",   f"非{letter.upper()}"]):
            return True
    return False

# 处理公式符/括号/提取数值
def _remove_latex_boxes_and_dollars(s: str) -> str:
    """展开 $$…$$ / $…$，并**正确处理嵌套**的 \boxed{…} / \fbox{…}。"""
    """展开 $$…$$ / $…$ / \(…\) / \[…\]，并处理嵌套的 \boxed{…}/\fbox{…}。
    还会去除 \left/\right 与常见的空白命令，规范 Unicode 负号。
    """
    t = s

    # 0) 统一 Unicode 负号（−）到 ASCII '-'
    t = t.replace("\u2212", "-")

    # 1) unwrap inline/block math: $$...$$, $...$, \( ... \), \[ ... \]
    t = re.sub(r"\$\$(.+?)\$\$", r"\1", t, flags=re.S)
    t = re.sub(r"\$(.+?)\$", r"\1", t, flags=re.S)
    t = re.sub(r"\\\((.+?)\\\)", r"\1", t, flags=re.S)  # \( ... \)
    t = re.sub(r"\\\[(.+?)\\\]", r"\1", t, flags=re.S)  # \[ ... \]

    # 2) 去掉 \left / \right（保留括号本体）
    t = re.sub(r"\\left\s*", "", t)
    t = re.sub(r"\\right\s*", "", t)

    # 3) unwrap \boxed{...} / \fbox{...}（支持嵌套大括号）
    for cmd in ["boxed", "fbox"]:
        pat = rf"\\+{cmd}\{{"
        while True:
            m = re.search(pat, t)
            if not m:
                break
            i = m.end()
            depth = 1
            while i < len(t) and depth > 0:
                if t[i] == "{":
                    depth += 1
                elif t[i] == "}":
                    depth -= 1
                i += 1
            if depth == 0:
                inner = t[m.end():i-1]
                t = t[:m.start()] + inner + t[i:]
            else:
                break  # 不平衡就停

    # 4) 去掉常见空白命令：\, \!, \;, \:
    t = re.sub(r"\\[ ,!;:]", "", t)

    return t

##################### MCQ 抽取 #####################
_PHRASES_MCQ = [
    r"\bso\s+the\s+answer\s+is\b",
    r"\bthe\s+answer\s+is\b",
    r"\banswer\s+is\b",
    r"\bfinal\s+answer\s+is\b",
    r"\bfinal\s+answer\b\s*[:：]?",
    r"\banswer\b\s*[:：]",
    r"(?:因此|所以)?\s*答案\s*(?:为|是)\b",
    r"(?:故|综上)[，,:：]?\s*答案\s*(?:为|是)\b",
    r"(?:故选|选择|选项)\s*[A-S]",
]
_PUNCTS = r"[.\u3002,，、:：;；]"  # 常见中英标点
_PARENS_INSIDE = rf"\(\s*([A-S])\s*(?:{_PUNCTS})?\s*\)"     # (A.) / （A。）  扩展到S支持19选项
_PARENS_OUTSIDE = rf"\(\s*([A-S])\s*\)\s*(?:{_PUNCTS})?"   # (A). / （A）。  扩展到S支持19选项

def bbh_mcq_postprocess(text: str, choices: Optional[Iterable[str]] = None, task: Optional[str] = None) -> str:
    """
    返回标准形式 '(A)'。
    规则优先级：最后一次"声明短语"后的(字母) > 全局(字母) > 最后一个独立大写字母。
    在"声明片段"里不再触发否定规避；在全局回退时才做 letter-aware 否定规避。
    
    Args:
        text: 待处理的文本
        choices: 可选的选择列表
        task: 任务名称，用于确定选项范围
    """
    if not text: return ""
    t = _normalize_brackets(_normalize_spaces(text))
    allowed = _letters_allowed(choices, task)

    seg = _last_after_phrases(t, _PHRASES_MCQ)
    scopes = ([("seg", seg)] if seg else []) + [("full", t)]

    # 1) 括号中字母
    for scope_name, scope in scopes:
        hits = []
        for pat in (_PARENS_INSIDE, _PARENS_OUTSIDE):
            for m in re.finditer(pat, scope, flags=re.I):
                L = (m.group(1) or "").upper()
                if not L or L not in allowed:
                    continue
                if scope_name != "seg":
                    base = (t.find(scope) if scope is not t else 0)
                    if _negated_near(t, base + m.start(1), letter=L):
                        continue
                hits.append((m.start(), L))
            if hits:
                # seg 内：如果只命中 1 个且被否定词紧邻修饰，则忽略这次
                if scope_name == "seg" and len(hits) == 1:
                    i, L = hits[-1]
                    if _negated_near(scope, i, letter=L):
                        hits = []
                if hits:
                    return f"({hits[-1][1]})"

    # 2) 回退：独立大写字母（仅处理最后三行）
    standalone_tail_lines = 3
    N = max(1, int(standalone_tail_lines or 1))
    def _tail_lines(s: str, n: int) -> str:
        lines = s.split("\n")
        return "\n".join(lines[max(-len(lines),-n):]) if lines else s

    PAT_STANDALONE = r"(?<![A-Za-z])([A-S])(?![A-Za-z])"  # 支持最多 19 选项（A–S）
    for scope_name, scope in scopes:
        # seg：保持原有“一行片段”不变；full：仅取最后 N 行
        scan_text = scope if scope_name == "seg" else _tail_lines(scope, N)

        hits = []
        for m in re.finditer(PAT_STANDALONE, scan_text, flags=re.I):
            L = m.group(1).upper()
            if L not in allowed:
                continue
            if scope_name != "seg":
                # scan_text 是 t 的尾部子串 → 其起始在 t 中的位置是 len(t) - len(scan_text)
                base = len(t) - len(scan_text)
                if _negated_near(t, base + m.start(1), letter=L):
                    continue
            else:
                # seg 内：允许一次紧邻否定检测
                if _negated_near(scan_text, m.start(1), letter=L):
                    continue
            hits.append((m.start(1), L))

        if hits:
            return f"({hits[-1][1]})"

    return ""

##################### 自由问答抽取 #####################
# 规范布尔/有效性词
_BOOL_WORDS = {
    "yes":   ["yes","yeah","yep","plausible","likely"],
    "no":    ["no","nope","not plausible","unlikely","implausible","not likely",
              "unknown","cannot determine","cannot be determined","insufficient information"],
    "true":  ["true","correct","plausible","likely"],
    "false": ["false","incorrect","not plausible","unlikely","implausible","not likely",
              "unknown","cannot determine","cannot be determined","insufficient information"],
    "valid": ["valid"],
    "invalid":["invalid"],
}

def _normalize_int_like(s: str) -> Optional[str]:
    if not s:
        return None
    s = s.replace(",", "")  # 去千分位
    # 纯整数 or 尾随 .0... 视作整数
    if re.fullmatch(r"-?\d+(?:\.0+)?", s):
        return str(int(float(s)))
    return None

# 已移动到 BBH_TASK_CONFIG 中，此处保留作为向后兼容
# 各任务的布尔风格与大小写 - 从BBH_TASK_CONFIG中获取
def _get_task_bool_style(task: str):
    """从任务配置中获取布尔风格"""
    if task.lower() in BBH_TASK_CONFIG:
        config = BBH_TASK_CONFIG[task.lower()]
        return config.get("bool_style", ("yesno", "lower"))
    # 默认风格
    return ("yesno", "lower")

_PHRASES_FF = [
    r"\bso\s+the\s+answer\s+is\b",
    r"\bthe\s+answer\s+is\b",
    r"\banswer\s+is\b",
    r"\bfinal\s+answer\s+is\b",
    r"\bfinal\s+answer\b\s*[:：]?",
    r"\banswer\b\s*[:：]",
    r"(?:因此|所以)?\s*答案\s*(?:为|是)\b",
]

# ---- 基础：拿到“声明短语”后的首段并清洗 ----
def _ff__get_seg_and_clean(text: str) -> str:
    t = _normalize_brackets(_normalize_spaces(text))
    seg = _last_after_phrases(t, _PHRASES_FF)
    s = seg if seg else t.split("\n")[-1]
    s = _remove_latex_boxes_and_dollars(s).strip()

    # 安全截句（不破坏 3.14 等小数；同时不特别处理 9:30，因为自由题不需时间格式）
    # s = re.split(r"(?<!\d)[.!?。！？](?!\d)", s, maxsplit=1)[0]
    # s = re.split(r"(?<!\d)[.!?。！？](?!\d|[)）])", s, maxsplit=1)[0]
    
    s = s.strip().strip('"\'')

    # 去声明短语/中文前缀
    s = re.sub(r"(?i)^(?:so\s+)?the\s+answer\s+is\s*[:：\-]?\s*", "", s).strip()
    s = re.sub(r"(?i)^final\s+answer(?:\s+is)?\s*[:：\-]?\s*", "", s).strip()
    s = re.sub(r"^(?:因此|所以)?\s*答案\s*(?:为|是)\s*[:：\-]?\s*", "", s).strip()
    s = re.sub(r"^(?:最终|最后)\s*答案\s*(?:为|是)?\s*[:：\-]?\s*", "", s).strip()
    s = re.sub(r"^[\s:：,，、;；·•]+", "", s)

    return s

# ---- 数值/分数/百分号/科学计数/LaTeX 抽取（一个 token）----
def _parse_english_number_words(s: str) -> str:
    """
    改造版：不再仅做“前缀解析”，而是在整段文本中扫描英文数字词串，
    解析每一处可识别的“连续数字词片段”，返回**最后一次出现**的数值字符串。
    支持：
      - minus/negative 前缀
      - zero..nineteen, twenty..ninety
      - hundred, thousand, million, billion
      - 可选 'and'
      - 小数：point / dot 后跟若干数字词
      - 可选的百分号词：percent / per cent（返回时带上 %）
    """
    if not s:
        return ""

    text = s.lower()
    # 把连字符当空格处理（twenty-one → twenty one）
    text = re.sub(r"[-\u2010-\u2014]", " ", text)  # hyphen, non-breaking, en–, em—

    UNITS = {
        "zero":0, "one":1, "two":2, "three":3, "four":4, "five":5, "six":6, "seven":7,
        "eight":8, "nine":9, "ten":10, "eleven":11, "twelve":12, "thirteen":13,
        "fourteen":14, "fifteen":15, "sixteen":16, "seventeen":17, "eighteen":18, "nineteen":19
    }
    TENS = {
        "twenty":20, "thirty":30, "forty":40, "fifty":50, "sixty":60, "seventy":70,
        "eighty":80, "ninety":90
    }
    SCALES = {"hundred":100, "thousand":1000, "million":10**6, "billion":10**9}
    DECIMAL = {"point","dot"}
    NEG = {"minus","negative"}
    PERCENT = {"percent", "per", "cent"}  # “per cent” 两词写法

    # 切成词（只保留字母）
    words = re.findall(r"[a-z]+", text)

    def parse_from(idx: int):
        """从 words[idx:] 起，尽可能解析一个数字词串，返回 (值字符串, 消耗词数)。失败则返回 (None, 0)。"""
        i = idx
        neg = False
        percent_flag = False
        total = 0
        current = 0
        consumed = 0
        hit_any = False

        # 负号
        neg_count = 0
        while i < len(words) and words[i] in NEG:
            neg_count += 1
            i += 1; consumed += 1
        neg = (neg_count % 2 == 1)

        # 主体（units/tens/scales/and）
        while i < len(words):
            w = words[i]
            if w == "and":
                i += 1; consumed += 1; continue
            if w in UNITS:
                current += UNITS[w]; i += 1; consumed += 1; hit_any = True; continue
            if w in TENS:
                current += TENS[w]; i += 1; consumed += 1
                # 允许紧随一个 unit（如 twenty one）
                if i < len(words) and words[i] in UNITS:
                    current += UNITS[words[i]]; i += 1; consumed += 1
                hit_any = True
                continue
            if w in SCALES:
                # hundred: 乘以 100；thousand/million/billion：先乘再累加到 total
                scale = SCALES[w]
                if current == 0:
                    # 如 "hundred" 前面没有 unit/tens，视作 1 hundred
                    current = 1
                current *= scale
                i += 1; consumed += 1; hit_any = True
                if scale >= 1000:
                    total += current; current = 0
                continue
            break

        # 小数部分：point / dot
        frac = 0.0
        if i < len(words) and words[i] in DECIMAL:
            i += 1; consumed += 1
            base = 0.1
            while i < len(words) and words[i] in UNITS:
                frac += UNITS[words[i]] * base
                base *= 0.1
                i += 1; consumed += 1
                hit_any = True

        # 百分号词（允许紧跟或以 “per cent” 两词形式出现）
        if i < len(words) and words[i] == "percent":
            percent_flag = True
            i += 1
            consumed += 1
        elif i + 1 < len(words) and words[i] == "per" and words[i+1] == "cent":
            percent_flag = True
            i += 2
            consumed += 2

        if not hit_any:
            return None, 0

        value = total + current + frac
        if neg:
            value = -value

        # 输出格式：百分数则附加 %
        out = f"{value}".rstrip("0").rstrip(".") if isinstance(value, float) else str(value)
        if percent_flag:
            # 若是整数/小数都直接加 %
            out = f"{out}%"
        return out, consumed

    # 遍历所有起点，记录所有成功解析的 (结束位置, 值)
    found = []
    i = 0
    while i < len(words):
        val, n = parse_from(i)
        if val is not None and n > 0:
            end_pos = i + n
            found.append((end_pos, val))
            i = i + 1  # 允许重叠起点，向后滑动一步继续找
        else:
            i += 1

    if not found:
        return ""
    # 取“最后一次出现”的解析结果
    return found[-1][1]

def _normalize_frac_sign(expr: str) -> str:
    r"""
    统一 \frac 的负号位置：
      \frac{-a}{b}   -> -\frac{a}{b}
      \frac{a}{-b}   -> -\frac{a}{b}
      \frac{-a}{-b}  ->  \frac{a}{b}
    仅处理“简单” \frac{...}{...}（不包含嵌套大括号）。更复杂情况保持原样。
    """
    pattern = re.compile(r'\\frac\{\s*([^{}]+?)\s*\}\{\s*([^{}]+?)\s*\}')

    def repl(m: re.Match) -> str:
        num, den = m.group(1).strip(), m.group(2).strip()
        sign = 1
        # 统计分子/分母前置负号
        if num.startswith('-'):
            sign *= -1
            num = num[1:].strip()
        if den.startswith('-'):
            sign *= -1
            den = den[1:].strip()
        core = f"\\frac{{{num}}}{{{den}}}"
        return f"-{core}" if sign < 0 else core

    return pattern.sub(repl, expr)

def _ff__extract_number_like(s: str) -> str:
    """
    在整段文本中查找“数值样子”的 token，返回最后一个命中的片段。
    - 过滤时间样式（如 9:30）
    - 兼容千分位、小数、分数、科学计数法、百分号、LaTeX frac/sqrt
    - 若仍未命中，再用英文数字词解析（同样取最后一次出现）
    """
    if not s:
        return ""
    # 统一 Unicode 负号
    s = s.replace("\u2212", "-")

    # 如果整体像是 "( ... )" 的外壳，先去壳（仅一次）
    if re.match(r"^\s*\(.*\)\s*$", s):
        s = re.sub(r"^\s*\((.*)\)\s*$", r"\1", s)

    # 全局模式（不再使用 re.match）
    PAT_NUM = re.compile(
        r"""
        (?P<num>
            [+-]?[0-9]{1,3}(?:,[0-9]{3})*(?:\.[0-9]+)?(?:/\d+(?:\.\d+)?)?(?:%|)   # 千分位/小数/分数/百分号
          | [+-]?[0-9]+(?:\.[0-9]+)?[eE][+-]?[0-9]+                               # 科学计数法
          | [+-]?\s*\\d?frac\{[^}]+\}\{[^}]+\}                                    # LaTeX (d)frac
          | [+-]?\s*\\sqrt\{[^}]+\}                                               # LaTeX sqrt
          | [+-]?\d+(?:\.\d+)?(?:/\d+(?:\.\d+)?)?(?:%|)                           # 普通数字/分数/百分号
        )
        """,
        re.X,
    )

    cands = []
    for m in PAT_NUM.finditer(s):
        a, b = m.span("num")
        window = s[max(0, a - 3): min(len(s), b + 3)]
        # 时间格式过滤（如 9:30 / 12:05）
        if re.search(r"\d\s*:\s*\d", window):
            continue
        val = _normalize_frac_sign(m.group("num").strip())
        val = val.replace(",", "") # 去千分位逗号
        # 去空格 & 去句末句号（中英文）
        val = val.replace(" ", "").rstrip(".。")
        cands.append((b, val))

    if cands:
        return cands[-1][1]  # 取“最后一次出现”的数值片段

    # 数字字符没命中 → 英文数字词解析（同样取最后一次出现）
    word_val = _parse_english_number_words(s)
    return word_val or ""


# ---- 布尔/有效性抽取并按任务风格统一大小写 ----
def _ff__extract_bool_like(s: str, task: str) -> str:
    low = s.lower().strip()
    # 去末尾标点
    low = low.strip(".,!?;")
    
    # 新增对 web_of_lies 的特判，减少模型调用
    if (task or "").strip().lower() == "web_of_lies":
        # 常见表达：
        #   - X tells the truth / X is telling the truth  → YES
        #   - X does not tell(s) the truth / X is not telling the truth / not telling the truth → NO
        # 否定：不说真话/在说谎 → No
        neg_pat = re.compile(
            r"\b(?:"
            r"does\s+not\s+tell(?:s)?\s+the\s+truth|"     # does not tell(s) the truth
            r"doesn[’']t\s+tell(?:s)?\s+the\s+truth|"     # doesn't tell(s) the truth
            r"is\s+not\s+tell(?:ing)?\s+the\s+truth|"     # is not tell(ing) the truth
            r"isn[’']t\s+telling\s+the\s+truth|"          # isn't telling the truth
            r"not\s+tell(?:ing)?\s+the\s+truth|"          # not tell(ing) the truth
            r"tell(?:s)?\s+a\s+lie|"                      # tells a lie / tell a lie
            r"tell(?:s)?\s+lies|"                         # tells lies / tell lies
            r"is\s+telling\s+a\s+lie|"                    # is telling a lie
            r"is\s+lying|"                                # is lying
            r"lies|"                                      # X lies
            r")\b"
        )
        # 肯定：说真话 → Yes
        pos_pat = re.compile(
            r"\b(?:tell(?:s)?\s+the\s+truth|is\s+telling\s+the\s+truth)\b"
        )

        key0 = None
        if neg_pat.search(low):
            key0 = "no"
        elif pos_pat.search(low):
            key0 = "yes"

        if key0 is not None:
            # 映射到该任务声明的布尔组 & 大小写
            group, casing = _get_task_bool_style(task)
            if group == "truefalse":
                key0 = "true" if key0 == "yes" else "false"
            elif group == "validinvalid":
                key0 = "valid" if key0 == "yes" else "invalid"
            # 生成目标大小写
            tok = {
                "yes":"Yes","no":"No",
                "true":"True","false":"False",
                "valid":"Valid","invalid":"Invalid"
            }[key0]
            if casing == "lower": tok = tok.lower()
            elif casing == "upper": tok = tok.upper()
            return tok

    # 新增对 navigate 的特判，减少模型调用
    # 正：X returns to the starting point  → YES
    # 负：X does not return to the starting point → NO
    if (task or "").strip().lower() == "navigate":
        # 起点同义（尽量覆盖常见表述，仍保持保守以减少误匹配）
        loc = (
            r"(?:the\s+(?:starting|start(?:ing)?|original|initial)\s+(?:point|position|place|spot)"
            r"|the\s+start(?!\s+of)\b|the\s+beginning(?!\s+of)\b|the\s+origin)"
        )
        # 返回/回到 的肯定动词族
        verb_pos = r"(?:return(?:s|ed|ing)?|go(?:es)?\s+back|come(?:s)?\s+back|end(?:s|ed|ing)?\s+up\s+back)"

        # 否定：未回到起点
        neg_pat = re.compile(
            rf"\b(?:"
            rf"(?:does\s+not|doesn[’']t)\s+(?:return|go\s+back|come\s+back)\s+to\s+{loc}|"
            rf"(?:does\s+not|doesn[’']t)\s+end(?:s|ed|ing)?\s+up\s+back\s+(?:to|at)\s+{loc}|"
            rf"fail(?:s|ed|ing)?\s+to\s+return\s+to\s+{loc}|"
            rf"never\s+(?:return(?:s|ed|ing)?\s+to|go(?:es)?\s+back\s+to|come(?:s)?\s+back\s+to)\s+{loc}|"
            rf"(?:is\s+not|isn[’']t)\s+back\s+(?:at|to)\s+{loc}"
            rf")\b"
        )

        # 肯定：回到起点
        pos_pat = re.compile(
            rf"\b(?:{verb_pos})\s+(?:to|at)\s+{loc}\b"
            rf"|\bback\s+(?:to|at)\s+{loc}\b"
            rf"|\bback\s+(?:where|to\s+where)\s+\w+\s+started\b"
            rf"|\bend(?:s|ed|ing)?\s+back\s+(?:to|at)\s+{loc}\b"
        )

        key0 = None
        if neg_pat.search(low):
            key0 = "no"
        elif pos_pat.search(low):
            key0 = "yes"

        if key0 is not None:
            # 映射到该任务声明的布尔组 & 大小写
            group, casing = _get_task_bool_style(task)
            if group == "truefalse":
                key0 = "true" if key0 == "yes" else "false"
            elif group == "validinvalid":
                key0 = "valid" if key0 == "yes" else "invalid"
            tok = {
                "yes":"Yes","no":"No",
                "true":"True","false":"False",
                "valid":"Valid","invalid":"Invalid"
            }[key0]
            if casing == "lower": tok = tok.lower()
            elif casing == "upper": tok = tok.upper()
            return tok

    # 0) 段首优先（最可信）：允许一个收尾标点
    m0 = re.match(r'^\s*(yes|no|true|false|valid|invalid)\b[.!?，。]?\s*', low)
    if m0:
        key0 = m0.group(1)
        group, casing = _get_task_bool_style(task)  # 已有函数
        # 组内映射（当任务组为 yes/no，却出现 true/false 时，把它映射到 yes/no；反之亦然）
        if group == "yesno" and key0 in ("true", "false"):
            key0 = "yes" if key0 == "true" else "no"
        if group == "truefalse" and key0 in ("yes", "no"):
            key0 = "true" if key0 == "yes" else "false"
        if group == "validinvalid" and key0 in ("yes","no","true","false"):
            key0 = "valid" if key0 in ("yes","true") else "invalid"

        tok = {"yes":"Yes","no":"No","true":"True","false":"False","valid":"Valid","invalid":"Invalid"}[key0]
        if   casing == "lower": tok = tok.lower()
        elif casing == "upper": tok = tok.upper()
        return tok

    # 1) 否则在全文里找所有布尔/同义词，取最后出现的一个
    hits = []
    for key, words in _BOOL_WORDS.items():  # 词表定义见文件
        for w in sorted(words, key=len, reverse=True):
            for m in re.finditer(rf"\b{re.escape(w)}\b", low):
                hits.append((m.start(), key))
    if not hits:
        return ""

    hits.sort(key=lambda x: x[0])
    key = hits[-1][1]

    # 2) 组间映射（同上）
    group, casing = _get_task_bool_style(task)
    if group == "yesno" and key in ("true", "false"):
        key = "yes" if key == "true" else "no"
    if group == "truefalse" and key in ("yes", "no"):
        key = "true" if key == "yes" else "false"
    if group == "validinvalid" and key in ("yes","no","true","false"):
        key = "valid" if key in ("yes","true") else "invalid"

    tok = {"yes":"Yes","no":"No","true":"True","false":"False","valid":"Valid","invalid":"Invalid"}[key]
    if   casing == "lower": tok = tok.lower()
    elif casing == "upper": tok = tok.upper()
    return tok
    
# ---- 特殊任务：Dyck 括号序列 ----
def bbh_freeform_postprocess_dyck(text: str) -> str:
    """
    仅保留 ()[]{}<> 字符；以空格连接，匹配数据集的 target 形态（如 "] ]"、"} ) >"）。
    """
    s = _ff__get_seg_and_clean(text)

    strict = re.compile(
        r"(?:^|(?<=[\s:：]))"                    # ← 左侧允许：行首/空白/冒号
        r"([()\[\]{}<>]+(?:\s+[()\[\]{}<>]+)*)"  # 目标括号块
        r"(?=(?:\s|$|[.,;:!?，。！？]))"          # 右侧为空白/结束/句末符
    )

    min_tokens = 1  # 需要时可设为 2，避免被单个散落括号干扰

    # 1) 位置优先：从右往左选最后一个合格块
    matches = list(strict.finditer(s))
    for m in reversed(matches):
        tokens = re.findall(r"[()\[\]{}<>]", m.group(1))
        if len(tokens) >= min_tokens:
            return " ".join(tokens)

    # 2) 宽松兜底：没有严格块时，退回到无边界限制的匹配，再取最右一块
    loose = re.compile(r"[()\[\]{}<>]+(?:\s+[()\[\]{}<>]+)*")
    last = None
    for m in loose.finditer(s):
        last = m
    if last:
        return " ".join(re.findall(r"[()\[\]{}<>]", last.group(0)))

    return ""

# ---- 特殊任务：word_sorting ----
# 统一的单词模式：字母开头，可含连字符/撇号/&，以字母结尾
_WORD_RE = re.compile(r"[A-Za-z]+(?:[-'&][A-Za-z]+)*")
# 清单前缀（数字/罗马数字/项目符号）
_LIST_PREFIX_RE = re.compile(
    r'(?:'
    r'\(?\d{1,3}\)?[.)、．:]\s*|'              # 1. / 1) / (1) / 1、 / 1． / 1:
    r'\(?[ivxlcdmIVXLCDM]{1,6}\)?[.)]\s*|'    # i. / ii. / (iii)
    r'[•\-\*\u2022\u2023\u2219\u25E6\u25AA\u25CF\u25CB\u30FB]\s+'  # 常见项目符号
    r')'
    r'([A-Za-z][A-Za-z\s\-\'&]*)'             # 本项正文（以字母开头）
)
# 两清单项之间仅允许的“分隔符”
_DELIMS_ONLY_RE = re.compile(
    r'^[\s,.;:!?，。！？：；、()\[\]{}（）【】"“”\'`＊\*\-–—·•~〜～/\\\|｜=·]*$'
)

def _extract_from_list_tail(tail: str) -> list[str]:
    """从尾段文本中提取“最后一段连续清单块”的词序列；若无清单则返回空列表。"""
    ms = list(_LIST_PREFIX_RE.finditer(tail))
    if not ms:
        return []
    block = []
    prev_start = None
    # 核心就是判断两个词之间不能有预设分隔符之外的东西
    for m in reversed(ms):  # 从最后一项向前收集，保证取到“末尾那段”
        if prev_start is None:
            block.append(m)
            prev_start = m.start()
            continue
        between = tail[m.end():prev_start]
        if _DELIMS_ONLY_RE.fullmatch(between or ""):
            block.append(m)
            prev_start = m.start()
        else:
            break
    block.reverse()

    words: list[str] = []
    for m in block:
        words.extend(_WORD_RE.findall(m.group(1)))
    return [w.lower() for w in words]

# 取“最后一次答案提示语”之后的整段尾部文本（多行，保持你现有的清洗套路）
def _ff__tail_after_phrase(text: str) -> str:
    t = _normalize_brackets(_normalize_spaces(text))
    cut = -1
    for p in _PHRASES_FF:
        for m in re.finditer(p, t, flags=re.I):
            cut = max(cut, m.end())
    tail = t[cut:] if cut >= 0 else t
    tail = _remove_latex_boxes_and_dollars(tail).strip()
    return tail

def bbh_freeform_postprocess_word_sorting(text: str) -> str:
    s = _ff__get_seg_and_clean(text)
    s = re.sub(r"[,\u3001;；、]+", " ", s)
    
    # 改进的正则表达式：支持字母、连字符、撇号、&符号的组合
    # 格式：字母开头，中间可以有[-'&]，但不能连续出现，必须以字母结尾
    base_words = re.findall(r"[A-Za-z]+(?:[-'&][A-Za-z]+)*", s)
    # return " ".join(w.lower() for w in words) if words else ""

     # 2) 特判：仅当“检测到清单且清单法更长”时，才用清单结果
    tail = _ff__tail_after_phrase(text)
    list_words = _extract_from_list_tail(tail)
    # print(list_words)
    if len(base_words) == 1 and list_words and len(list_words) > len(base_words):
        words = list_words
    else:
        words = base_words

    if words:
        return " ".join(w.lower() for w in words) if words else ""

    # 3) 仅当前两种都失败时：很轻的回退 —— 末尾“每行一个词”的连续块
    lines = [ln.strip() for ln in tail.splitlines()]
    single_word_line = re.compile(r"^[A-Za-z]+(?:[-'&][A-Za-z]+)*$")
    block_rev, started = [], False
    for ln in reversed(lines):
        if not ln:
            if started: break
            continue
        if single_word_line.fullmatch(ln):
            block_rev.append(ln); started = True
        else:
            if started: break
    if block_rev:
        return " ".join(w.lower() for w in reversed(block_rev))

    # 实在抓不到就空
    return ""


# ---- 通用自由题（任务感知）主入口 ----
def bbh_freeform_postprocess_task(text: str, task: Optional[str] = None) -> str:
    """
    针对 10 个自由题的任务感知后处理：
      - dyck_languages        -> 空格分隔的括号序列
      - word_sorting          -> 空格分隔的小写单词序列
      - multistep_arithmetic_two、object_counting -> 数值类
      - boolean_expressions   -> True/False（Title）
      - formal_fallacies      -> Invalid/Valid（Title）
      - causal_judgement、web_of_lies、sports_understanding、navigate -> yes/no（小写）

    若都未命中，兜底返回“声明短语”后的清洗片段；若开头是 (C) 这种字母，也会只取字母。
    """
    task = (task or "").strip().lower()
    # text = text.lower()

    # 1) 两个特殊多 token 任务
    if task == "dyck_languages":
        return bbh_freeform_postprocess_dyck(text)
    if task == "word_sorting":
        return bbh_freeform_postprocess_word_sorting(text)

    # 2) 其它八类：先定位段落
    s = _ff__get_seg_and_clean(text)

    # 2.2 数值优先（适配 multistep_arithmetic_two / object_counting 等）
    if task and task.lower() in BBH_TASK_CONFIG:
        config = BBH_TASK_CONFIG[task.lower()]
        if config.get("special") == "number":
            num_like = _ff__extract_number_like(s)
            if num_like:
                # 仅 object_counting 接受纯整数，并裁剪到 [0, 18]
                if task == "object_counting":
                    m = _normalize_int_like(num_like)
                    if m:
                        val = max(0, min(18, int(m)))
                        return str(val)
                    # 若不是纯整数，可按你的策略：
                    # 1) 返回原样（交给评测判错） -> return num_like
                    # 2) 或返回空串让后续兜底 -> return ""
                    return num_like
                return num_like
    else:
        # 兜底：尝试提取数值
        num_like = _ff__extract_number_like(s)
        if num_like:
            # 仅 object_counting 接受纯整数，并裁剪到 [0, 18]
            if task == "object_counting":
                m = _normalize_int_like(num_like)
                if m:
                    val = max(0, min(18, int(m)))
                    return str(val)
                # 若不是纯整数，可按你的策略：
                # 1) 返回原样（交给评测判错） -> return num_like
                # 2) 或返回空串让后续兜底 -> return ""
                return num_like
            return num_like

    # 2.3 布尔/有效性（大小写按任务风格）
    if task and task.lower() in BBH_TASK_CONFIG:
        config = BBH_TASK_CONFIG[task.lower()]
        if "bool_style" in config:
            bl = _ff__extract_bool_like(s, task)
            if bl:
                return bl

    return s

# --------- 统一入口函数 ---------
def bbh_postprocess(text: str, task: Optional[str] = None, choices: Optional[Iterable[str]] = None) -> str:
    """
    BBH任务的统一后处理入口函数。
    根据任务类型自动选择MCQ或自由问答处理器。
    
    Args:
        text: 待处理的文本
        task: 任务名称（如果提供，会根据任务配置自动选择处理器）
              支持的任务包括：
              - MCQ任务：sports_understanding, navigate, geometric_shapes等
              - 自由问答：boolean_expressions, formal_fallacies, multistep_arithmetic_two等
              - 特殊格式：dyck_languages, word_sorting
        choices: 可选的选择列表（用于MCQ任务，如果未提供则使用任务配置的默认范围）
    
    Returns:
        处理后的答案字符串
        - MCQ任务返回 "(A)" 格式
        - 布尔题根据任务返回正确大小写（True/False, yes/no, Valid/Invalid）
        - 数值题返回清理后的数字
        - 特殊任务返回相应格式（空格分隔的括号/单词）
    
    Examples:
        >>> bbh_postprocess("So the answer is (C).", "geometric_shapes")
        "(C)"
        >>> bbh_postprocess("The answer is True.", "boolean_expressions") 
        "True"
        >>> bbh_postprocess("Answer: 42.5", "multistep_arithmetic_two")
        "42.5"
    """
    if not text:
        return ""
    
    # 如果未提供任务名称，默认尝试MCQ处理
    if not task:
        return bbh_mcq_postprocess(text)
    
    task_lower = task.lower()
    
    # 根据任务配置选择处理器
    if task_lower in BBH_TASK_CONFIG:
        config = BBH_TASK_CONFIG[task_lower]
        task_type = config.get("type", "mcq")
        
        if task_type == "mcq":
            return bbh_mcq_postprocess(text, task=task_lower)
        elif task_type == "freeform":
            return bbh_freeform_postprocess_task(text, task)
    
    # 兜底：尝试MCQ处理
    if choices:
        return bbh_mcq_postprocess(text, choices, task=task_lower)
    return bbh_mcq_postprocess(text, task=task_lower)

# 先用简单的函数处理，如果处理不了，再调用 bbh_freeform_postprocess_task
def bbh_freeform_postprocess_simple(text: str) -> str:
    """
    单函数版的稳健 FF 抽取：
    - 取最后一次出现的“答案提示语”后的片段（英文/中文/‘故选’）
    - 兜底：去句末句点（中/英），返回剩余文本
    """
    t = (text or "").strip()
    if not t:
        return ""

    # 1) 归一化奇异字符到 ASCII（弯引号、全角＆、各种连字符）
    trans = {
        ord("\u2019"): ord("'"),  # ’
        ord("\u2018"): ord("'"),  # ‘
        ord("\uFF06"): ord("&"),  # ＆
        ord("\u2010"): ord("-"),  # ‐
        ord("\u2011"): ord("-"),  # non-breaking hyphen
        ord("\u2012"): ord("-"),  # ‒
        ord("\u2013"): ord("-"),  # –
        ord("\u2014"): ord("-"),  # —
        ord("\u2212"): ord("-"),  # − (minus)
    }
    t = t.translate(trans)

    # 2) 取最后一次“答案提示语”后的片段
    cues = [
        r"(?i)\bfinal\s+answer\s*(?:is|=)",
        r"(?i)\bthe\s+answer\s*(?:is|=)",
        r"(?i)\banswer\s*(?:is|=)",
        r"答案\s*(?:为|是)",
        r"(?:故选|选择|选项)\s*",  # 故选C/选择B/选项D
    ]
    cut = 0
    for pat in cues:
        ms = list(re.finditer(pat, t))
        if ms:
            cut = max(cut, ms[-1].end())
    seg = t[cut:].strip() if cut else t

    if seg.endswith(".") or seg.endswith("。"):
        seg = seg[:-1]
    return seg.strip()


if __name__ == "__main__":
    mcq_cases = [
        {"task": "geometric_shapes", "text": "So the answer is (C).", "num_choices": 5, "target": "(C)"},
        {"task": "date_understanding", "text": "最终推理… 因此答案为（D）。", "num_choices": 5, "target": "(D)"},
        {"task": "any", "text": "故选C，原因如下：", "num_choices": 5, "target": "(C)"},
        {"task": "any", "text": "It is not (A) but (B). So the answer is (B).", "num_choices": 5, "target": "(B)"},
        {"task": "reasoning_about_colored_objects", "text": "The answer is (R).", "num_choices": 18, "target": "(R)"},
    ]

    ff_cases = [
        {"task": "multistep_arithmetic_two", "text": "The answer is (A.)", "num_choices": None, "target": "A"},
        {"task": "multistep_arithmetic_two", "text": "The answer is you have a total of 6 vegetables", "num_choices": None, "target": "6"},
        {"task": "multistep_arithmetic_two", "text": "Reasoning… So the answer is you get -42.15.", "num_choices": None, "target": "-42.15"},
        {"task": "multistep_arithmetic_two", "text": "The answer is: you have \\boxed{\\frac{7}{9}} objects.", "num_choices": None, "target": "\\frac{7}{9}"},
        {"task": "boolean_expressions", "text": "Thus the answer is TRUE.", "num_choices": None, "target": "True"},
        {"task": "formal_fallacies", "text": "The argument is Invalid.", "num_choices": None, "target": "Invalid"},
        {"task": "sports_understanding", "text": "The answer is Implausible.", "num_choices": None, "target": "yes"},
        {"task": "dyck_languages", "text": "So the brackets are: ( [ ) ]", "num_choices": None, "target": "( [ ) ]"},
        {"task": "word_sorting", "text": "So the answer is:\n1. New-York's\n2. Words\n3. john's-daughter", "num_choices": None, "target": "new-york's words john's-daughter"},
    ]

    print("\n=== MCQ cases ===")
    for case in mcq_cases:
        got = bbh_mcq_postprocess(case["text"], choices=[chr(ord("A")+i) for i in range(case["num_choices"])])
        print(f"task={case['task']:35s} | expected={case['target']!r} | got={got!r}")

    print("\n=== Free-form cases ===")
    for case in ff_cases:
        got = bbh_freeform_postprocess_task(case["text"], task=case["task"])
        print(f"task={case['task']:35s} | expected={case['target']!r} | got={got!r}")