"""
Data management module for puzzle benchmark system
"""
import os
import pandas as pd
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import logging

import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import DATA_DIR, DOMAINS

logger = logging.getLogger(__name__)


@dataclass
class PuzzleData:
    """Single puzzle data structure"""
    name: str
    description: str
    domain: str
    source_file: str


class DataManager:
    """Data manager responsible for loading and managing test data"""
    
    def __init__(self):
        self.data_cache: Dict[str, List[PuzzleData]] = {}
    
    def load_domain_data(self, domain: str) -> List[PuzzleData]:
        """Load test data for the specified domain"""
        if domain not in DOMAINS:
            raise ValueError(f"Unsupported domain: {domain}. Supported: {DOMAINS}")
        
        # Check cache
        if domain in self.data_cache:
            return self.data_cache[domain]
        
        domain_dir = os.path.join(DATA_DIR, domain)
        if not os.path.exists(domain_dir):
            logger.warning(f"Domain directory not found: {domain_dir}")
            return []
        
        puzzles = []
        
        # Find all Excel files for this domain
        for filename in os.listdir(domain_dir):
            if filename.endswith('.xlsx') and domain in filename:
                file_path = os.path.join(domain_dir, filename)
                try:
                    df = pd.read_excel(file_path)
                    
                    # Verify required columns exist
                    required_columns = ['Name', 'Description']
                    missing_columns = [col for col in required_columns if col not in df.columns]
                    if missing_columns:
                        logger.error(f"Missing columns in {filename}: {missing_columns}")
                        continue
                    
                    # Process each row of data
                    for _, row in df.iterrows():
                        if pd.notna(row['Name']) and pd.notna(row['Description']):
                            puzzle = PuzzleData(
                                name=str(row['Name']).strip(),
                                description=str(row['Description']).strip(),
                                domain=domain,
                                source_file=filename
                            )
                            puzzles.append(puzzle)
                    
                    logger.info(f"Loaded {len(df)} puzzles from {filename}")
                    
                except Exception as e:
                    logger.error(f"Error loading {file_path}: {e}")
        
        # Cache results
        self.data_cache[domain] = puzzles
        logger.info(f"Total {len(puzzles)} puzzles loaded for domain: {domain}")
        
        return puzzles
    
    def get_all_domains(self) -> List[str]:
        """Get all available domains"""
        available_domains = []
        for domain in DOMAINS:
            domain_dir = os.path.join(DATA_DIR, domain)
            if os.path.exists(domain_dir):
                available_domains.append(domain)
        return available_domains
    
    def get_puzzle_by_name(self, domain: str, name: str) -> Optional[PuzzleData]:
        """Get specific puzzle by name"""
        puzzles = self.load_domain_data(domain)
        for puzzle in puzzles:
            if puzzle.name == name:
                return puzzle
        return None
    
    def refresh_cache(self):
        """Clear cache and force reload data"""
        self.data_cache.clear()
        logger.info("Data cache cleared") 