import re

from swebench.harness.constants import TestStatus
from swebench.harness.utils import ansi_escape


def parse_log_calypso(log: str) -> dict[str, str]:
    """
    Parser for test logs generated by Calypso test suite
    """
    test_status_map = {}
    suite = []

    get_test_name = lambda suite, match_pattern, line: " - ".join(
        [" - ".join([x[0] for x in suite]), re.match(match_pattern, line).group(1)]
    ).strip()

    for log in log.split(" ./node_modules/.bin/jest ")[1:]:
        for line in log.split("\n"):
            if any([line.startswith(x) for x in ["Test Suites", "  ● "]]):
                break
            elif line.strip().startswith("✓"):
                # Test passed
                match_pattern = (
                    r"^\s+✓\s(.*)\(\d+ms\)$"
                    if re.search(r"\(\d+ms\)", line) is not None
                    else r"^\s+✓\s(.*)"
                )
                test_status_map[get_test_name(suite, match_pattern, line)] = (
                    TestStatus.PASSED.value
                )
            elif line.strip().startswith("✕"):
                # Test failed
                match_pattern = (
                    r"^\s+✕\s(.*)\(\d+ms\)$"
                    if re.search(r"\(\d+ms\)", line) is not None
                    else r"^\s+✕\s(.*)"
                )
                test_status_map[get_test_name(suite, match_pattern, line)] = (
                    TestStatus.FAILED.value
                )
            elif len(line) - len(line.lstrip()) > 0:
                # Adjust suite name
                indent = len(line) - len(line.lstrip())
                if len(suite) == 0:
                    # If suite is empty, initialize it
                    suite = [(line.strip(), indent)]
                else:
                    while len(suite) > 0 and suite[-1][-1] >= indent:
                        # Pop until the last element with indent less than current indent
                        suite.pop()
                    suite.append([line.strip(), indent])

    return test_status_map

def parse_log_mocha(log: str) -> dict[str, str]:
    """
    Parse Mocha CLI output and extract test statuses with full suite paths.
    """
    test_status_map = {}
    suite_stack = []

    get_test_name = lambda suite, match_pattern, line: " - ".join(
        suite + [re.match(match_pattern, line).group(1)]
    ).strip()

    # Replace the original for loop in swebench/harness/log_parsers/javascript.py
    # with this corrected version.
    for line in log.splitlines():
        stripped = line.strip()

        # --- Match for PASSED tests ---
        if stripped.startswith("✓") or stripped.startswith("✔"):
            # Determine the correct regex pattern based on whether a timestamp is present
            pass_pattern = r"^\s*(?:✓|✔)\s+(.*)\s+\(\d+ms\)$" if re.search(r"\(\d+ms\)$", stripped) else r"^\s*(?:✓|✔)\s+(.*)"
            match = re.match(pass_pattern, stripped)
            
            # IMPORTANT: Only proceed if the match was successful
            if match:
                test_name_part = match.group(1).strip()
                suite_path = [item[0] for item in suite_stack]
                full_test_name = ".".join(suite_path + [test_name_part])
                test_status_map[full_test_name] = TestStatus.PASSED.name
            continue  # Skip to the next line after processing

        # --- Match for FAILED tests ---
        if stripped.startswith("✕") or stripped.startswith("x"):
            fail_pattern = r"^\s*(?:✕|x)\s+(.*)"
            match = re.match(fail_pattern, stripped)
            if match:
                test_name_part = match.group(1).strip()
                suite_path = [item[0] for item in suite_stack]
                full_test_name = ".".join(suite_path + [test_name_part])
                test_status_map[full_test_name] = TestStatus.FAILED.name
            continue  # Skip to the next line after processing

        # --- Match for SKIPPED tests ---
        if stripped.startswith("-"):
            skip_pattern = r"^\s*-\s+(.*)"
            match = re.match(skip_pattern, stripped)
            if match:
                test_name_part = match.group(1).strip()
                suite_path = [item[0] for item in suite_stack]
                full_test_name = ".".join(suite_path + [test_name_part])
                test_status_map[full_test_name] = TestStatus.SKIPPED.name
            continue  # Skip to the next line after processing

        # --- Logic for handling suites (this part is unchanged) ---
        if line.startswith(" ") and not stripped.startswith(("✓", "✔", "✕", "x", "-")):
            # This is a nested suite
            indent = len(line) - len(line.lstrip(' '))
            while suite_stack and suite_stack[-1][1] >= indent:
                suite_stack.pop()
            suite_stack.append((stripped, indent))
        elif stripped:  # Top-level suite
            # Check if it's not a result line that we failed to parse
            if not any(stripped.startswith(char) for char in ["✓", "✔", "✕", "x", "-"]):
                suite_stack = [(stripped, 0)]


    # Flatten keys from (name, indent) -> name only
    flat_test_status_map = {
        " - ".join([x[0] for x in suite_stack[:i]] + [test_name.split(" - ")[-1]]): status
        for test_name, status in test_status_map.items()
        for i in range(len(suite_stack)+1)
        if " - ".join([x[0] for x in suite_stack[:i]]) in test_name
    }

    return test_status_map

def parse_log_mocha_v2(log: str):
    """
    Parse Mocha 'spec' output, returning
        { "Suite - SubSuite - Test": "PASSED"/"FAILED"/"PENDING" }
    """
    ANSI_RE   = re.compile(r"\x1b\[[0-9;]*m")        # strip colours
    PASS_RE   = re.compile(r"^\s*[✓√✔]\s+(.*?)(?:\s+\(\d+ms\))?\s*$")      # “✓ test name”
    FAIL_RE   = re.compile(r"^\s{4,}\d+\)\s+(.*)")   # “  1) failing test”
    CROSS_RE  = re.compile(r"^\s*[×✕]\s+(.*)")       # “× failing test”
    PEND_RE   = re.compile(r"^\s*[-•]\s+(.*)")       # “- pending test”
    SUMMARY_RE = re.compile(r"^\s*\d+\s+(passing|failing|pending)")
    DUR_TAIL_RE = re.compile(r"\s+\([\d\.]+ ?[a-zA-Z]+\)$")


    def strip_ansi(txt: str) -> str:
        return ANSI_RE.sub("", txt)

    test_status_map = {}
    suite_stack = []

    count_empty_lines = 0
    
    for raw in log.splitlines():
        line = strip_ansi(raw.rstrip())

        if not line:
            count_empty_lines += 1
            if count_empty_lines>=2:
                count_empty_lines = 0
                suite_stack = []
            continue
            
        # ---- Summary detected ---------------------------------------------------
        m = SUMMARY_RE.match(line)
        if m:
            suite_stack = []
            continue

        # ---- Passing test ---------------------------------------------------
        m = PASS_RE.match(line)
        if m:
            test_name = m.group(1).strip()
            test_name = DUR_TAIL_RE.sub("", test_name)
            test_status_map[" - ".join(suite_stack + [test_name])] = "PASSED"
            continue

        # ---- Failing test (✓ numbered or × cross) ---------------------------
        m = FAIL_RE.match(line) or CROSS_RE.match(line)
        if m:
            test_name = m.group(1).strip()
            test_name = DUR_TAIL_RE.sub("", test_name)
            test_status_map[" - ".join(suite_stack + [test_name])] = "FAILED"
            continue

        # ---- Pending / skipped ---------------------------------------------
        m = PEND_RE.match(line)
        if m:
            test_name = m.group(1).strip()
            test_name = DUR_TAIL_RE.sub("", test_name)
            test_status_map[" - ".join(suite_stack + [test_name])] = "PENDING"
            continue

        # ---- Otherwise treat as suite header --------------------------------
        indent = len(line) - len(line.lstrip())
        level  = (indent // 2) - 1                       # two spaces per level
        if indent >=2:
          suite_stack[level:] = [line.strip()]

    return test_status_map

def parse_log_chart_js(log: str) -> dict[str, str]:
    """
    Parser for test logs generated by ChartJS test suite
    """
    test_status_map = {}
    failure_case_patterns = [
        (r"Chrome\s[\d\.]+\s\(.*?\)\s(.*)FAILED$", re.MULTILINE),
    ]
    for failure_case_pattern, flags in failure_case_patterns:
        failures = re.findall(failure_case_pattern, log, flags)
        if len(failures) == 0:
            continue
        for failure in failures:
            test_status_map[failure] = TestStatus.FAILED.value
    return test_status_map


def parse_log_marked(log: str) -> dict[str, str]:
    """
    Parser for test logs generated by Marked test suite
    """
    test_status_map = {}
    for line in log.split("\n"):
        if re.search(r"^\d+\)\s(.*)", line):
            test = re.search(r"^\d+\)\s(.*)", line).group(1)
            test_status_map[test.strip()] = TestStatus.FAILED.value
    return test_status_map


def parse_log_p5js(log_content: str) -> dict[str, str]:
    def remove_json_blocks(log):
        filtered_lines = []
        in_json_block = False
        in_json_list_block = False
        for line in log.split("\n"):
            stripped_line = line.rstrip()  # Remove trailing whitespace
            if stripped_line.endswith("{"):
                in_json_block = True
                continue
            if stripped_line.endswith("["):
                in_json_list_block = True
                continue
            if stripped_line == "}" and in_json_block:
                in_json_block = False
                continue
            if stripped_line == "]" and in_json_list_block:
                in_json_list_block = False
                continue
            if in_json_block or in_json_list_block:
                continue
            if stripped_line.startswith("{") and stripped_line.endswith("}"):
                continue
            if stripped_line.startswith("[") and stripped_line.endswith("]"):
                continue
            filtered_lines.append(line)
        return "\n".join(filtered_lines)

    def remove_xml_blocks(log):
        xml_pat = re.compile(r"<(\w+)>[\s\S]*?<\/\1>", re.MULTILINE)
        match = xml_pat.search(log)
        while match:
            # count the number of opening tags in the match
            opening_tags = match.group().count(rf"<{match.group(1)}>") - 1
            opening_tags = max(opening_tags, 0)
            start = match.start()
            end = match.end()
            log = log[:start] + f"<{match.group(1)}>" * opening_tags + log[end:]
            match = xml_pat.search(log)
        return log

    def is_valid_fail(match):
        last_line_indent = 0
        for line in match.group(2).split("\n"):
            line_indent = len(line) - len(line.lstrip())
            if line_indent <= last_line_indent:
                return False
            last_line_indent = line_indent
        return True

    log_content = ansi_escape(log_content)
    log_content = remove_json_blocks(log_content)
    log_content = remove_xml_blocks(log_content)
    test_results = {}

    # Parse failing tests
    fail_pattern = re.compile(r"^\s*(\d+)\)(.{0,1000}?):", re.MULTILINE | re.DOTALL)
    for match in fail_pattern.finditer(log_content):
        if is_valid_fail(match):
            test_names = list(map(str.strip, match.group(2).split("\n")))
            full_name = ":".join(test_names)
            test_results[full_name] = TestStatus.FAILED.value

    return test_results


def parse_log_react_pdf(log: str) -> dict[str, str]:
    """
    Parser for test logs generated by Carbon test suite
    """
    test_status_map = {}
    for line in log.split("\n"):
        for pattern in [
            (r"^PASS\s(.*)\s\([\d\.]+ms\)", TestStatus.PASSED.value),
            (r"^PASS\s(.*)\s\([\d\.]+\ss\)", TestStatus.PASSED.value),
            (r"^PASS\s(.*)\s\([\d\.]+s\)", TestStatus.PASSED.value),
            (r"^PASS\s(.*)", TestStatus.PASSED.value),
            (r"^FAIL\s(.*)\s\([\d\.]+ms\)", TestStatus.FAILED.value),
            (r"^FAIL\s(.*)\s\([\d\.]+\ss\)", TestStatus.FAILED.value),
            (r"^FAIL\s(.*)\s\([\d\.]+s\)", TestStatus.FAILED.value),
            (r"^FAIL\s(.*)", TestStatus.FAILED.value),
        ]:
            if re.search(pattern[0], line):
                test_name = re.match(pattern[0], line).group(1)
                test_status_map[test_name] = pattern[1]
                break
    return test_status_map

def parse_log_jest(log: str) -> dict[str, str]:
    """
    Parser for test logs generated with Jest. Assumes --verbose flag.

    Args:
        log (str): log content
    Returns:
        dict: test case to test status mapping
    """
    test_status_map = {}

    pattern = r"^\s*(✓|✕|○)\s(.+?)(?:\s\((\d+\s*m?s)\))?$"

    for line in log.split("\n"):
        match = re.match(pattern, line.strip())
        if match:
            status_symbol, test_name, _duration = match.groups()
            if status_symbol == "✓":
                test_status_map[test_name] = TestStatus.PASSED.value
            elif status_symbol == "✕":
                test_status_map[test_name] = TestStatus.FAILED.value
            elif status_symbol == "○":
                test_status_map[test_name] = TestStatus.SKIPPED.value
    return test_status_map


def parse_log_jest_json(log: str) -> dict[str, str]:
    """
    Parser for test logs generated with Jest. Assumes the --json flag has been
    piped into JEST_JSON_JQ_TRANSFORM. Unlike --verbose, tests with the same name
    in different describe blocks print with different names.
    """
    test_status_map = {}

    pattern = r"^\[(PASSED|FAILED)\]\s(.+)$"

    for line in log.split("\n"):
        match = re.match(pattern, line.strip())
        if match:
            status, test_name = match.groups()
            if status == "PASSED":
                test_status_map[test_name] = TestStatus.PASSED.value
            elif status == "FAILED":
                test_status_map[test_name] = TestStatus.FAILED.value
    return test_status_map


def parse_log_vitest(log: str) -> dict[str, str]:
    """
    Parser for test logs generated with vitest. Assumes --reporter=verbose flag.
    """
    test_status_map = {}

    pattern = r"^\s*(✓|×|↓)\s(.+?)(?:\s(\d+\s*m?s?|\[skipped\]))?$"

    for line in log.split("\n"):
        match = re.match(pattern, line.strip())
        if match:
            status_symbol, test_name, _duration_or_skipped = match.groups()
            if status_symbol == "✓":
                test_status_map[test_name] = TestStatus.PASSED.value
            elif status_symbol == "×":
                test_status_map[test_name] = TestStatus.FAILED.value
            elif status_symbol == "↓":
                test_status_map[test_name] = TestStatus.SKIPPED.value
    return test_status_map


def parse_log_karma(log: str) -> dict[str, str]:
    """
    Parser for test logs generated with Karma. Handles duplicate test names in
    different describe blocks. Logic is brittle.
    """
    test_status_map = {}
    current_indent = -1
    current_suite = []
    started = False

    pattern = r"^(\s*)?([✔✖])?\s(.*)$"

    for line in log.split("\n"):
        if line.startswith("SUMMARY:"):
            # Individual test logs end here
            return test_status_map

        if "Starting browser" in line:
            started = True
            continue

        if not started:
            continue

        match = re.match(pattern, line)
        if match:
            indent, status, name = match.groups()

            if indent and not status:
                new_indent = len(indent)
                if new_indent > current_indent:
                    current_indent = new_indent
                    current_suite.append(name)
                elif new_indent < current_indent:
                    current_indent = new_indent
                    current_suite.pop()
                    continue

            if status in ("✔", "✖"):
                full_test_name = " > ".join(current_suite + [name])
                test_status_map[full_test_name] = (
                    TestStatus.PASSED.value
                    if status == "✔"
                    else TestStatus.FAILED.value
                )

    return test_status_map


def parse_log_tap(log: str) -> dict[str, str]:
    """
    Parser for test logs generated with TAP

    Args:
        log (str): log content
    Returns:
        dict: test case to test status mapping
    """
    test_status_map = {}

    # Pattern to match TAP result lines
    pattern = r"^(ok|not ok) (\d+) (.+)$"

    for line in log.split("\n"):
        match = re.match(pattern, line.strip())
        if match:
            status, _test_number, test_name = match.groups()
            if status == "ok":
                test_status_map[test_name] = TestStatus.PASSED.value
            elif status == "not ok":
                test_status_map[test_name] = TestStatus.FAILED.value

    return test_status_map


def get_js_parser_by_name(name: str):
    if name=="calypso":
        return parse_log_calypso
    if name=="chartjs":
        return parse_log_chart_js
    if name=="marked":
        return parse_log_marked
    if name=="p5js":
        return parse_log_p5js
    if name=="reactpdf":
        return parse_log_react_pdf
    if name=="vitest":
        return parse_log_vitest
    if name=="jest":
        return parse_log_jest
    if name=="mocha":
        return parse_log_mocha_v2
    if name=="karma":
        return parse_log_karma
    if name=="tap":
        return parse_log_tap
    return parse_log_vitest

MAP_REPO_TO_PARSER_JS = {
    "Automattic/wp-calypso": parse_log_calypso,
    "chartjs/Chart.js": parse_log_chart_js,
    "markedjs/marked": parse_log_marked,
    "processing/p5.js": parse_log_p5js,
    "diegomura/react-pdf": parse_log_react_pdf,
    "axios/axios": parse_log_marked,
    "tjw-lint/vue3-snapshot-serializer": parse_log_vitest
}
