import os
import re

from op_eval import config
from op_eval.dataset import dataset

_normalized_to_pascal = {
    meta.get("normalized_name"): meta.get("pascal_name")
    for meta in dataset.values()
    if meta.get("normalized_name") and meta.get("pascal_name")
}


def get_ref_src_path(op: str) -> str:
    """Return the reference implementation path for a given operator."""
    # Reference files are in: reference/KernelBench/{level}/{op}.py
    level = dataset[op]["level"]  # e.g., "level1"
    return os.path.join(config.ref_impl_base_path, "KernelBench", level, f"{op}.py")


def read_file(file_path) -> str:
    if not os.path.exists(file_path):
        print(f"File {file_path} does not exist")
        return ""
    
    try:
        with open(file_path, "r") as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return ""

def extract_first_code(output_string: str, code_language_types: list[str]) -> str:
    """
    Extract first code block from model output, specified by code_language_type
    """
    trimmed = output_string.strip()

    # Extracting the first occurrence of content between backticks
    code_match = re.search(r"```(.*?)```", trimmed, re.DOTALL)

    if code_match:
        # Strip leading and trailing whitespace from the extracted code
        code_block = code_match.group(1).strip()

        # depends on code_language_type: cpp, python, etc.
        # sometimes the block of code is ```cpp ... ``` instead of ``` ... ```
        # in this case strip the cpp out
        for code_type in code_language_types:
            if code_block.startswith(code_type):
                code = code_block[len(code_type) :].strip()

        return code, f'```{code_block}```'

def underscore_to_pascalcase(underscore_str):
    """
    Convert underscore-separated string to PascalCase using dataset lookup.
    
    Args:
        underscore_str (str): Input string with underscores (e.g., "vector_add")
        
    Returns:
        str: PascalCase version (e.g., "VectorAdd")
    """
    if not underscore_str:  # Handle empty string
        return ""
    
    # Adapter: Lookup in dataset
    if underscore_str in dataset:
        return dataset[underscore_str]["pascal_name"]

    normalized_pascal = _normalized_to_pascal.get(underscore_str)
    if normalized_pascal:
        return normalized_pascal
    
    # Adapter: Handle legacy _custom suffix
    if underscore_str.endswith('_custom'):
        base_name = underscore_str[:-7]
        if base_name in dataset:
            return dataset[base_name]["pascal_name"] + "Custom"
        normalized_pascal = _normalized_to_pascal.get(base_name)
        if normalized_pascal:
            return normalized_pascal + "Custom"
    
    # Strict fallback (return as is, no manual conversion)
    return underscore_str


def normalize_op_name(op_name):
    """
    Normalize operator name for C++ kernel code generation using dataset lookup.
    """
    # Adapter: Lookup in dataset
    if op_name in dataset:
        return dataset[op_name]["normalized_name"]

    # Strict fallback (return as is, no regex guessing)
    return op_name


