import pathlib
from enum import Enum
from typing import Set, Dict, Any, Tuple
import re

class Language(Enum):
    """Enumeration for supported programming languages."""
    PYTHON = 'python'
    JAVASCRIPT = 'javascript'
    TYPESCRIPT = 'typescript'
    GO = 'go'
    RUST = 'rust'
    C = 'c'
    CPP = 'cpp'
    JAVA = 'java'
    RUBY = 'ruby'
    C_SHARP = 'csharp'
    PHP = 'php'

    def __str__(self):
        return self.value

LANGUAGE_TO_EXTENSIONS = {
    Language.PYTHON: ('.py', '.pyw'),
    Language.JAVASCRIPT: ('.js', '.jsx'),
    Language.TYPESCRIPT: ('.ts', '.tsx'),
    Language.GO: ('.go',),
    Language.RUST: ('.rs',),
    Language.C: ('.c', '.h'),
    Language.CPP: ('.cpp', '.hpp', '.cc', '.cxx', '.h'),
    Language.JAVA: ('.java',),
    Language.RUBY: ('.rb',),
    Language.C_SHARP: ('.cs',),
    Language.PHP: ('.php',) # 新增 PHP
}

# ... (Language Enum and LANGUAGE_TO_EXTENSIONS 保持不变) ...

# --- 扩展后的语言config ---
# 新增 "boundary_nodes" 键, 用于定义在向上Find祖先node时不应越过的节点class型.
LANGUAGE_CONFIG: Dict[Language, Dict[str, Any]] = {
    # Python
    Language.PYTHON: {
        "toplevel_definitions": """
            (function_definition) @func
            (class_definition) @cls
        """,
        "class_methods": "(function_definition) @meth",
        "boundary_nodes": {'function_definition', 'class_definition'},
        "function_definitions": "(function_definition) @func",
        "str": "#"
    },
    # JavaScript
    Language.JAVASCRIPT: {
        "toplevel_definitions": """
            (function_declaration) @func
            (lexical_declaration (variable_declarator value: (arrow_function))) @func
            (class_declaration) @cls
        """,
        "class_methods": "(method_definition) @meth",
        "boundary_nodes": {'function_declaration', 'arrow_function', 'class_declaration', 'method_definition'},
        "function_definitions": """
            (function_declaration) @func
            (lexical_declaration (variable_declarator value: (arrow_function))) @func
            (method_definition) @meth
        """,
        "str": "//"
    },
    # TypeScript
    Language.TYPESCRIPT: {
        "toplevel_definitions": """
            (function_declaration) @func
            (lexical_declaration (variable_declarator value: (arrow_function))) @func
            (class_declaration) @cls
            (interface_declaration) @cls 
        """,
        "class_methods": "(method_definition) @meth",
        "boundary_nodes": {'function_declaration', 'arrow_function', 'class_declaration', 'method_definition', 'interface_declaration'},
        "function_definitions": """
            (function_declaration) @func
            (lexical_declaration (variable_declarator value: (arrow_function))) @func
            (method_definition) @func
        """,
        "str": "//"
    },
    # Go
    Language.GO: {
        "toplevel_definitions": """
            (function_declaration) @func 
            (type_declaration (type_spec name:(_) @cls))
            (method_declaration) @meth 
        """,
        "class_methods": "",  # Go 的method不在 struct 内部定义
        "boundary_nodes": {'function_declaration', 'method_declaration', 'type_spec'},
        "function_definitions": """
            (function_declaration) @func
            (method_declaration) @func
        """,
        "str": "//"
    },
    # Rust
    Language.RUST: {
        "toplevel_definitions": """
            (function_item) @func
            (struct_item) @cls
            (enum_item) @cls
            (trait_item) @cls
            (impl_item) @cls 
        """,
        "class_methods": "(function_item) @meth", # 在 impl 块内部Find
        "boundary_nodes": {'function_item', 'struct_item', 'enum_item', 'impl_item', 'trait_item'},
        "function_definitions": "(function_item) @func",
        "str": "//"
    },
    # C
    Language.C: {
        "toplevel_definitions": """
            (function_definition) @func
            (struct_specifier) @cls
            (union_specifier) @cls
            (enum_specifier) @cls
        """,
        "class_methods": "", # C 没有class或method
        "boundary_nodes": {'function_definition', 'struct_specifier', 'union_specifier', 'enum_specifier'},
        "function_definitions": "(function_definition) @func",
        "str": "//"
    },
    # C++
    Language.CPP: {
        "toplevel_definitions": """
            (function_definition) @func
            (class_specifier) @cls
            (struct_specifier) @cls
            (namespace_definition) @cls 
        """,
        "class_methods": "(function_definition) @meth",
        "boundary_nodes": {'function_definition', 'class_specifier', 'struct_specifier', 'namespace_definition'},
        "function_definitions": "(function_definition) @func",
        "str": "//"
    },
    # Java
    Language.JAVA: {
        "toplevel_definitions": """
            (class_declaration) @cls
            (interface_declaration) @cls
            (enum_declaration) @cls
        """,
        "class_methods": "(method_declaration) @meth",
        "boundary_nodes": {'method_declaration', 'class_declaration', 'interface_declaration', 'enum_declaration'},
        "function_definitions": "(method_declaration) @func",
        "str": "//"
    },
    # Ruby
    Language.RUBY: {
        "toplevel_definitions": """
            (method) @func
            (singleton_method) @func
            (class) @cls
            (module) @cls
        """,
        "class_methods": "(method) @meth",
        "boundary_nodes": {'method', 'singleton_method', 'class', 'module'},
        "function_definitions": "(method) @func",
        "str": "#"
    },
    # C#
    Language.C_SHARP: {
        "toplevel_definitions": """
            (namespace_declaration) @cls
            (class_declaration) @cls
            (struct_declaration) @cls
            (interface_declaration) @cls
            (enum_declaration) @cls
            (method_declaration) @func
        """,
        "class_methods": "(method_declaration) @meth",
        "boundary_nodes": {'namespace_declaration', 'class_declaration', 'struct_declaration', 'interface_declaration', 'enum_declaration', 'method_declaration'},
        "function_definitions": "(method_declaration) @func",
        "str": "//"
    },
    # PHP
    Language.PHP: {
        "toplevel_definitions": """
            (function_definition) @func
            (class_declaration) @cls
            (trait_declaration) @cls
            (interface_declaration) @cls
        """,
        "class_methods": "(method_declaration) @meth",
        "boundary_nodes": {'function_definition', 'class_declaration', 'trait_declaration', 'interface_declaration', 'method_declaration'},
        "function_definitions": """
            (function_definition) @func
            (method_declaration) @func
        """,
        "str": "//"
    }
}

class LanguageManager:
    """
    Manages language-specific configurations and operations,
    such as identifying test files.
    """

    def __init__(self, language: Language | str):
        if isinstance(language, Language):
            self.language = language
        elif isinstance(language, str):
            try:
                self.language = Language(language) 
            except ValueError:
                valid_languages = [lang.value for lang in Language]
                raise ValueError(f"'{language}' is not a supported language. Please use one of: {valid_languages}")
        else:
            raise TypeError("language must be an instance of Language enum or a corresponding string value.")
        # ensure该语言有config, 否则给一个defaultempty配置
        self.config = LANGUAGE_CONFIG.get(self.language, {})

    def get_queries(self) -> Dict[str, str]:
        """returncurrent语言的tree-sitter查询语句dict."""
        return {k: v for k, v in self.config.items() if k in ["toplevel_definitions", "class_methods"]}

    def get_function_queries(self) -> str:
        """returncurrent语言的function定义查询语句."""
        return self.config.get("function_definitions", "")
    
    def get_comment_str(self) -> str:
        """returncurrent语言的注释strings."""
        return self.config["str"]

    def get_boundary_nodes(self) -> Set[str]:
        """returncurrent语言的作用域edge界nodeclass型set."""
        return self.config.get("boundary_nodes", set())
    
    def _is_test_by_convention(self, path: pathlib.Path, keywords: Set[str]) -> bool:
        """
        Generic test file identification based on keywords in path parts.
        Checks if any directory or the filename starts with a given keyword.
        """
        return any(part.lower().startswith(tuple(keywords)) for part in path.parts)
    
    def code_suffix_set(self):
        return LANGUAGE_TO_EXTENSIONS[self.language]

    def is_test_path(self, path_str: str) -> bool:
        """
        Checks if the given path is a test-related file for the specified language.
        
        Args:
            path_str: The file path string.
            
        Returns:
            True if it's a test file, False otherwise.
        """
        path = pathlib.Path(path_str)
        filename = path.name.lower()
        
        # --- Python ---
        if self.language == Language.PYTHON:
            test_keywords = {"test", "tests", "e2e", "testing"}
            return self._is_test_by_convention(path, test_keywords)

        # --- Go ---
        elif self.language == Language.GO:
            return filename.endswith('_test.go')

        # --- JavaScript / TypeScript ---
        elif self.language in (Language.JAVASCRIPT, Language.TYPESCRIPT):
            if any(part.lower() == '__tests__' for part in path.parts):
                return True
            if '.test.' in filename or '.spec.' in filename:
                return True
            return self._is_test_by_convention(path, {"test", "tests"})

        # --- Java ---
        elif self.language == Language.JAVA:
            if "test" in [part.lower() for part in path.parts]:
                return True
            stem = path.stem
            return stem.startswith("Test") or stem.endswith("Test")
            
        # --- PHP (新增) ---
        elif self.language == Language.PHP:
            # PHPUnit framework约定testfiles以 `Test.php` 结尾 (e.g., "UserTest.php")
            # 并且通常位于 `tests` directory下
            if filename.endswith('test.php'):
                return True
            return self._is_test_by_convention(path, {"tests"})

        # --- Ruby ---
        elif self.language == Language.RUBY:
            if filename.endswith('_spec.rb'):
                return True
            if filename.startswith('test_'):
                return True
            return self._is_test_by_convention(path, {"test", "spec", "specs"})

        # --- Rust ---
        elif self.language == Language.RUST:
            return "tests" in path.parts

        # --- C / C++ / C# (通用规则) ---
        elif self.language in (Language.C, Language.CPP, Language.C_SHARP):
            test_keywords = {"test", "tests"}
            return self._is_test_by_convention(path, test_keywords)

        else:
            return False
    
    def get_imports_pattern_old(self):
        """
        returncurrent语言用于Match「导入 / include / require 语句」的正则object.
None.
        """
# strings,
        PATTERNS: dict[Language, str] = {
            Language.PYTHON: r"""
                ^(?:from\s+[\w\.]+\s+)?          # optional from
                import\s+                        # 关键字
                (?:[\w\.]+(?:\s+as\s+\w+)?      # 单模块 / 别名
                   (?:\s*,\s*[\w\.]+(?:\s+as\s+\w+)?)* |
                   \([\s\S]*?\))
                """,

            Language.JAVASCRIPT: r"""
                ^(?:import\s+(?:[\w*{}][^;'"]*\s+from\s+)?['"`][^'"]+['"`] |
                   (?:const|let|var)\s+\w+\s*=\s*require\s*\(\s*['"`][^'"]+['"`]\s*\))
                """,

            Language.TYPESCRIPT: r"""
                ^(?:import\s+(?:type\s+)?[\w*{}][^;'"]*\s+from\s+['"`][^'"]+['"`] |
                   import\s*['"`][^'"]+['"`] |
                   (?:const|let|var)\s+\w+\s*=\s*require\s*\(\s*['"`][^'"]+['"`]\s*\))
                """,

            Language.JAVA: r"""
                ^import\s+(?:static\s+)?[\w\.]+\s*;
                """,

            Language.GO: r"""
                ^import\s+(?:\w+\s+)?["`][^"`]+["`]
                """,

            Language.PHP: r"""
                ^(?:use\s+(?:function|const)?\s*[\w\\]+(?:\s+as\s+\w+)?\s*; |
                   require(?:_once)?\s*\(\s*['"][^'"]+['"]\s*\)\s*; |
                   include(?:_once)?\s*\(\s*['"][^'"]+['"]\s*\)\s*;)
                """,

            Language.RUBY: r"""
                ^(?:require\s+['"][^'"]+['"] |
                   require_relative\s+['"][^'"]+['"] |
                   include\s+\w+ |
                   import\s+\w+)
                """,

            Language.RUST: r"""
                ^(?:use\s+[\w:{}*]+(?:\s+as\s+\w+)?\s*; |
                   extern\s+crate\s+\w+(?:\s+as\s+\w+)?\s*;)
                """,

            Language.C: r"""
                ^#\s*include\s+(?:<[^>]+>|"[^"]+")
                """,

            Language.CPP: r"""
                ^#\s*include\s+(?:<[^>]+>|"[^"]+")
                """,

            Language.C_SHARP: r"""
                ^(?:using\s+(?:static\s+|namespace\s+)?[\w\.]+(?:\s*=\s*[\w\.]+)?\s*; |
                   extern\s+alias\s+\w+\s*;)
                """,
        }

        pat = PATTERNS.get(self.language)
        return re.compile(pat, re.MULTILINE | re.VERBOSE) if pat else None

    def get_imports_pattern(self):
        """
        returncurrent语言用于Match「导入 / include / require 语句」的正则object.
None.
        """
# strings,
        PATTERNS: dict[Language, str] = {
            Language.PYTHON: r"^(?:from\s+[\w\.]+\s+)?import\s+(?:[\w\.]+(?:\s+as\s+\w+)?(?:\s*,\s*[\w\.]+(?:\s+as\s+\w+)?)*|\([\s\S]*?\))",

            Language.JAVASCRIPT: r"^(?:import\s+(?:[\w*{}][^;'\"]*\s+from\s+)?['`\"][^'\"]+['`\"]|(?:const|let|var)\s+\w+\s*=\s*require\s*\(\s*['`\"][^'\"]+['`\"]\s*\))",

            Language.TYPESCRIPT: r"^(?:import\s+(?:type\s+)?[\w*{}][^;'\"]*\s+from\s+['`\"][^'\"]+['`\"]|import\s*['`\"][^'\"]+['`\"]|(?:const|let|var)\s+\w+\s*=\s*require\s*\(\s*['`\"][^'\"]+['`\"]\s*\))",

            Language.JAVA: r"^import\s+(?:static\s+)?[\w\.]+\s*;",

            Language.GO: r"^import\s+(?:\w+\s+)?[`\"][^`\"]+[`\"]",

            Language.PHP: r"^(?:use\s+(?:function|const)?\s*[\w\\]+(?:\s+as\s+\w+)?\s*;|require(?:_once)?\s*\(\s*['\"][^'\"]+['\"]\s*\)\s*;|include(?:_once)?\s*\(\s*['\"][^'\"]+['\"]\s*\)\s*;)",

            Language.RUBY: r"^(?:require\s+['\"][^'\"]+['\"]|require_relative\s+['\"][^'\"]+['\"]|include\s+\w+|import\s+\w+)",

            Language.RUST: r"^(?:use\s+[\w:{}*]+(?:\s+as\s+\w+)?\s*;|extern\s+crate\s+\w+(?:\s+as\s+\w+)?\s*;)",

            Language.C: r"^#\s*include\s+(?:<[^>]+>|\"[^\"]+\")",

            Language.CPP: r"^#\s*include\s+(?:<[^>]+>|\"[^\"]+\")",

            Language.C_SHARP: r"^(?:using\s+(?:static\s+|namespace\s+)?[\w\.]+(?:\s*=\s*[\w\.]+)?\s*;|extern\s+alias\s+\w+\s*;)",
        }

        pat = PATTERNS.get(self.language)
        return re.compile(pat, re.MULTILINE) if pat else None
    