"""
Path utilities for dictionary learning.
"""
import os
from pathlib import Path
from typing import List, Optional, Tuple


def validate_data_paths(key_path: str, value_path: str) -> Tuple[bool, str]:
    """
    Validate that data paths exist and are accessible.
    
    Args:
        key_path: Path to key data file
        value_path: Path to value data file
        
    Returns:
        tuple: (is_valid, error_message)
    """
    key_file = Path(key_path)
    value_file = Path(value_path)
    
    if not key_file.exists():
        return False, f"Key data file not found: {key_path}"
    
    if not value_file.exists():
        return False, f"Value data file not found: {value_path}"
    
    if not key_file.is_file():
        return False, f"Key path is not a file: {key_path}"
    
    if not value_file.is_file():
        return False, f"Value path is not a file: {value_path}"
    
    # Check file permissions
    if not os.access(key_path, os.R_OK):
        return False, f"Key data file is not readable: {key_path}"
    
    if not os.access(value_path, os.R_OK):
        return False, f"Value data file is not readable: {value_path}"
    
    return True, ""


def find_data_file(base_dir: Path, patterns: List[str]) -> Optional[Path]:
    """
    Find the first existing file matching any of the given patterns.
    
    Args:
        base_dir: Base directory to search in
        patterns: List of filename patterns to try
        
    Returns:
        Path to found file or None if no file found
    """
    for pattern in patterns:
        candidate = base_dir / pattern
        if candidate.exists() and candidate.is_file():
            return candidate
    return None


def construct_wiki_data_paths(
    model_name: str,
    concat_layers: int = 1,
    base_dir: str = '/data/llm/tmp'
) -> Tuple[str, str]:
    """
    Construct paths for wiki data files based on model name and configuration.
    
    Args:
        model_name: Model name containing wiki information
        concat_layers: Number of concatenated layers
        base_dir: Base directory for data files
        
    Returns:
        tuple: (key_data_path, value_data_path)
        
    Raises:
        ValueError: If model name format is invalid
    """
    # Extract data capacity from model name
    wiki_index = model_name.find('wiki')
    if wiki_index == -1:
        raise ValueError(f"Invalid model_name, 'wiki' not found: {model_name}")
    
    data_capa = model_name[wiki_index + 4:]  # Skip "wiki"
    base_path = Path(base_dir)
    
    # Define possible filename patterns
    model_prefix = "Qwen2.5-7B-Instruct-1M"
    patterns = [
        f"{model_prefix}_2nd_wiki_samples_1m_key_{data_capa}_all_{concat_layers}layer.pt",
        f"{model_prefix}_wiki_samples_1m_key_{data_capa}_all_{concat_layers}layer.pt"
    ]
    
    # Find existing key file
    key_file = find_data_file(base_path, patterns)
    
    if key_file is None:
        # If concat > 1, try the concat_features subdirectory
        if concat_layers > 1:
            concat_base = base_path / 'wiki_samples' / 'concat_features'
            key_file = find_data_file(concat_base, patterns)
    else:
        # If concat > 1, adjust path to concat_features subdirectory
        if concat_layers > 1:
            key_str = str(key_file)
            if '/wiki_samples/' in key_str and '/concat_features/' not in key_str:
                key_file = Path(key_str.replace('/wiki_samples/', '/wiki_samples/concat_features/'))
    
    # Use first pattern as default if no file found
    if key_file is None:
        if concat_layers > 1:
            key_file = base_path / 'wiki_samples' / 'concat_features' / patterns[0]
        else:
            key_file = base_path / patterns[0]
    
    # Construct value file path
    value_file = Path(str(key_file).replace('_key_', '_value_'))
    
    return str(key_file), str(value_file)


def ensure_path_exists(path: Path, is_file: bool = False) -> None:
    """
    Ensure a path exists, creating directories as needed.
    
    Args:
        path: Path to ensure exists
        is_file: If True, ensure parent directory exists; if False, ensure path itself exists
    """
    if is_file:
        path.parent.mkdir(parents=True, exist_ok=True)
    else:
        path.mkdir(parents=True, exist_ok=True)


def get_relative_path(path: Path, base: Path) -> Path:
    """
    Get relative path from base directory.
    
    Args:
        path: Target path
        base: Base directory
        
    Returns:
        Relative path from base to target
    """
    try:
        return path.relative_to(base)
    except ValueError:
        # Paths are not relative, return absolute path
        return path


def safe_path_join(*parts: str) -> Path:
    """
    Safely join path parts, handling various input formats.
    
    Args:
        *parts: Path parts to join
        
    Returns:
        Joined path
    """
    # Filter out None and empty parts
    valid_parts = [str(part) for part in parts if part]
    
    if not valid_parts:
        return Path()
    
    return Path(valid_parts[0]).joinpath(*valid_parts[1:])


def normalize_model_name(model_name: str) -> str:
    """
    Normalize model name for use in filenames.
    
    Args:
        model_name: Raw model name
        
    Returns:
        Normalized model name safe for filenames
    """
    # Replace problematic characters
    normalized = model_name.replace("/", "_").replace("\\", "_")
    
    # Remove any remaining problematic characters
    invalid_chars = '<>:"|?*'
    for char in invalid_chars:
        normalized = normalized.replace(char, "_")
    
    return normalized
