"""String manipulation utilities."""
from typing import Dict, Any
from examples.dataset import dataset


def underscore_to_pascalcase(underscore_str: str) -> str:
    """Convert underscore-separated string to PascalCase.
    
    Args:
        underscore_str: Input string with underscores (e.g., "vector_add")
        
    Returns:
        PascalCase version (e.g., "VectorAdd")
    """
    if not underscore_str:  # Handle empty string
        return ""

    # Adapter: Lookup in dataset
    if underscore_str in dataset:
        return dataset[underscore_str]["pascal_name"]
    
    # Adapter: Handle legacy _custom suffix
    if underscore_str.endswith('_custom'):
        base_name = underscore_str[:-7]
        if base_name in dataset:
            return dataset[base_name]["pascal_name"] + "Custom"
    
    return underscore_str


def camel_to_snake(name: str) -> str:
    """Convert CamelCase string to snake_case.
    
    Args:
        name: Input string in CamelCase (e.g., "VectorAdd")
        
    Returns:
        snake_case version (e.g., "vector_add")
    """
    out = []
    for i, ch in enumerate(name):
        if ch.isupper():
            if i > 0:
                out.append("_")
            out.append(ch.lower())
        else:
            out.append(ch)
    return "".join(out)


import re

def normalize_op_name(op_name: str) -> str:
    """Normalize operator name for C++ kernel code generation.
    
    Steps:
    1. Strip leading numeric prefix (e.g., "1_Square_..." -> "Square_...")
    2. Clean up trailing underscores
    3. Collapse multiple consecutive underscores
    4. Convert to lowercase for snake_case file naming
    
    Examples:
        "1_Square_matrix_multiplication_" -> "square_matrix_multiplication"
        "100_HingeLoss" -> "hingeloss"
        "39_L2Norm_" -> "l2norm"
    
    Args:
        op_name: Original operator name (may start with digits)
        
    Returns:
        Normalized snake_case operator name safe for C++ identifiers
    """
    # Adapter: Lookup in dataset
    if op_name in dataset:
        return dataset[op_name]["normalized_name"]

    return op_name


def trim_long_string(string: str, threshold: int = 5100, k: int = 2500) -> str:
    """Trim long string to first k and last k characters if exceeds threshold.
    
    Args:
        string: Input string
        threshold: Length threshold to trigger trimming
        k: Number of characters to keep from start and end
        
    Returns:
        Trimmed string or original string if below threshold
    """
    # Check if the length of the string is longer than the threshold
    if len(string) > threshold:
        # Output the first k and last k characters
        first_k_chars = string[:k]
        last_k_chars = string[-k:]
        
        truncated_len = len(string) - 2 * k
        
        return f"{first_k_chars}\n ... [{truncated_len} characters truncated] ... \n{last_k_chars}"
    else:
        return string


def describe_api(api: Dict[str, Any]) -> str:
    """Generate a readable Markdown description for one API entry (tolerant).
    
    Args:
        api: API dictionary with name, description, inputs, outputs, example
        
    Returns:
        Markdown formatted API description
    """
    name = api.get("name", "UNKNOWN")
    text = f"### API: {name}\n"
    text += f"**Description:** {api.get('description', 'No description provided.')}\n\n"
    
    inputs = api.get("inputs") or []
    if inputs:
        text += "#### Parameters:\n"
        for p in inputs:
            pname = p.get("name", "param")
            ptype = p.get("type", "any")
            req = "Required" if p.get("required") else "Optional"
            pdesc = p.get("description", "")
            text += f"- `{pname}` ({req}) — Type: `{ptype}` — {pdesc}\n"
        text += "\n"
    
    outputs = api.get("outputs") or []
    if outputs:
        text += "#### Outputs:\n"
        for o in outputs:
            oname = o.get("name", "out")
            otype = o.get("type", "any")
            odesc = o.get("description", "")
            text += f"- `{oname}` — Type: `{otype}` — {odesc}\n"
        text += "\n"
    
    example = api.get("example") or ""
    if example:
        text += f"#### Example:\n```cpp\n{example}\n```\n\n"
    return text

