def is_step_marker(name):
    return name.startswith("step_execute_model[")


def is_nccl_op(name):
    return name and name.startswith("nccl")


def is_torch_frontend_op(name):
    return name and name.startswith("torch.")


def is_comm_op(name):
    return name and name.startswith("communication_op::")

def is_kernel_launch_api(name: str) -> bool:
    return name.startswith(("cudaLaunchKernel", "cudaLaunchKernelExC", "cudaGraphLaunch", "cuLaunchKernel"))


def is_nccl_kernel(name: str) -> bool:
    return name and ("nccl" in name.lower())

def parse_op_id(name):
    """解析 'op_id = 123' 的数字"""
    try:
        parts = name.split(",")
        for p in parts:
            p = p.strip()
            if p.startswith("op_id ="):
                return int(p[len("op_id ="):].strip())
    except:
        pass
    return None


def get_event_time(ev):
    if ev["type"] == "NVTX_MARKER":
        return ev["timestamp"]
    elif ev["type"] in ("RUNTIME", "DRIVER"):
        return ev["start"]
    elif ev["type"] == "KERNEL":
        return ev["gpu_start"]
    return float("inf")

def find_nearest_launch_api(kernel, candidates):
    k_start = kernel.get("gpu_start", 0)
    min_dist, best_match = float('inf'), None
    for api in candidates:
        if not is_kernel_launch_api(api.get("name", "")):
            continue
        dist = abs(api.get("start", 0) - k_start)
        if dist < min_dist:
            min_dist, best_match = dist, api
    return best_match

def assign_events_to_ops_best(ev_list, ops, field, allow_multi=False):
    op_idx = 0
    for ev in ev_list:
        ev_start = ev.get("start", ev.get("timestamp", 0))
        ev_end = max(ev.get("end", ev_start), ev_start)

        while op_idx < len(ops) and ops[op_idx].get("end", 0) < ev_start:
            op_idx += 1

        candidates, temp_idx = [], op_idx
        while temp_idx < len(ops):
            op = ops[temp_idx]
            if op.get("start", 0) > ev_end:
                break
            if op.get("start", 0) <= ev_start and op.get("end", 0) >= ev_end:
                candidates.append(op)
            temp_idx += 1

        if not candidates:
            continue

        matched_ops = (
            [min(candidates, key=lambda o: o.get("end", 0) - o.get("start", 0))]
            if not allow_multi else candidates
        )

        for op in matched_ops:
            op.setdefault(field, []).append(ev)