import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')))


import logging               
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from langdetect import detect, LangDetectException
from fortress.config import get_config               
from typing import List, Dict, Any, Optional, Union              

logger = logging.getLogger(__name__)                          

class NLPAnalyzer:
    def __init__(self):                                            
        self.logger = logging.getLogger("fortress.core.nlp_analyzer")
        config = get_config()['nlp']
        self.unsupported_language_fallback = config.get('unsupported_language_fallback', 'log_warning_return_none')
        self.sentiment_analyzer = SentimentIntensityAnalyzer()
                                        

    def _handle_unsupported_language(self, feature_name: str, default_return_value: Any = None):
        """Handles unsupported language scenarios based on configuration."""
        if self.unsupported_language_fallback == "log_error_raise_exception":
            self.logger.error(f"{feature_name} extraction failed due to unsupported language or spaCy model not loaded.")
            raise ValueError(f"{feature_name} extraction failed due to unsupported language or spaCy model not loaded.")
        elif self.unsupported_language_fallback == "log_warning_return_none":
            self.logger.warning(f"{feature_name} extraction not possible (unsupported language/model not loaded). Returning {default_return_value}.")
            return default_return_value
        return default_return_value

                                                                       
                                                 
              

                                                                                        
                                                 
              

                                                                            
                                                                                      
                                                                                  
                                                     
              

                                                                   
                                                 
              

    def get_sentiment_scores(self, text: str) -> Optional[Dict[str, float]]:
        """
        Calculates sentiment scores for the given text using VADER.
        Ensures that the language is supported (English for VADER).
        """
                                                                           
                                                                                       
        try:
            analyzer = SentimentIntensityAnalyzer()
            scores = analyzer.polarity_scores(text)
            return scores
        except Exception as e:
            logger.warning(f"VADER sentiment analysis failed for text: \\'{text[:50]}...\\'. Error: {e}")
            return self._handle_unsupported_language("sentiment_scores", default_return_value=None)

                                                                     
                                                 
               
        
    def get_char_level_stats(self, text: str) -> Dict[str, Union[float, int]]:        
        """
        Calculates character-level statistics for the given text.
        This implementation is basic and does not rely on spaCy.
        """
        if not text:
            return {
                "alphanum_ratio": 0.0,
                "special_char_count": 0,
                "whitespace_ratio": 0.0,
                "avg_word_length": 0.0,
                "length": 0
            }

        length = len(text)
        alphanumeric_chars = sum(1 for char in text if char.isalnum())
        special_chars = sum(1 for char in text if not char.isalnum() and not char.isspace())
        whitespace_chars = sum(1 for char in text if char.isspace())
        
        words = text.split()
        num_words = len(words)
        total_word_length = sum(len(word) for word in words)

        return {
            "alphanum_ratio": alphanumeric_chars / length if length > 0 else 0.0,
            "special_char_count": special_chars,
            "whitespace_ratio": whitespace_chars / length if length > 0 else 0.0,
            "avg_word_length": total_word_length / num_words if num_words > 0 else 0.0,
            "length": length
        }

    def get_dominant_language(self, text: str) -> Optional[str]:
        """
        Detects the dominant language of the given text using langdetect.
        """
        if not text.strip():                                                          
            return self._handle_unsupported_language("dominant_language", default_return_value=None)
        try:
            return detect(text)
        except LangDetectException:
            logger.warning(f"Language detection failed for text: \\'{text[:50]}...\\'.")
            return self._handle_unsupported_language("dominant_language", default_return_value=None)
        except Exception as e:
            self.logger.error(f"Error detecting language: {e}")
            return None

    def extract_all_features(self, text: str) -> Dict[str, Any]:
        """
        Extracts all available non-spaCy NLP features for the given text.
        """
        features = {}
        
                           
        lang = self.get_dominant_language(text)
        features['dominant_language'] = lang

                                                  
                                                                            
        features['sentiment_scores'] = self.get_sentiment_scores(text)

                               
        features['char_level_stats'] = self.get_char_level_stats(text)
        
                                                                                                
                                                                                            
                                                                                                     
                                                                                              

        return features

if __name__ == '__main__':
                                                  
                                                                  
                          
                                                                                                                          
       
                                          
    from rich.console import Console
    from rich.table import Table
    from rich.text import Text

    console = Console()
    console.print("[bold green]Starting NLPAnalyzer test...[/bold green]")

    try:
        analyzer = NLPAnalyzer()
        test_text_en = "This is a complex English sentence with modals like can and should, and some pronouns like I and you. It will be analyzed."
        test_text_fr = "Ceci est une phrase en français."
        test_text_long = """This is a very long text. """ * 500                   

        console.print(f"\n[bold]Analyzing English text:[/bold] '{test_text_en}'")

        features_en = analyzer.extract_all_features(test_text_en)

        table_en = Table(title="NLP Features (English)")
        table_en.add_column("Feature", style="cyan")
        table_en.add_column("Value", style="magenta")

        for key, value in features_en.items():
            table_en.add_row(key, str(value))
        console.print(table_en)

        console.print(f"\n[bold]Analyzing French text (expect some None for spaCy dependent features if model is en_core_web_sm):[/bold] '{test_text_fr}'")
        features_fr = analyzer.extract_all_features(test_text_fr)
        table_fr = Table(title="NLP Features (French)")
        table_fr.add_column("Feature", style="cyan")
        table_fr.add_column("Value", style="magenta")
        for key, value in features_fr.items():
            table_fr.add_row(key, str(value))
        console.print(table_fr)

                                                                        
        console.print(f"\n[bold]Analyzing very long English text (first 50 chars):[/bold] '{test_text_long[:50]}...'")
        features_long = analyzer.extract_all_features(test_text_long)
        table_long = Table(title="NLP Features (Long English Text) - Selected")
        table_long.add_column("Feature", style="cyan")
        table_long.add_column("Value", style="magenta")
        selected_features_to_show = ['dominant_language', 'sentence_complexity_score', 'sentiment_scores', 'char_level_stats']
        for key in selected_features_to_show:
            if key in features_long:
                table_long.add_row(key, str(features_long[key]))
            else:
                table_long.add_row(key, "N/A")
        console.print(table_long)
        console.print("Note: POS tags and dependency relations for long text might be truncated or None based on internal limits.")


    except ImportError as ie:
        console.print(f"[bold red]ImportError during NLPAnalyzer test:[/bold red] {ie}. Please ensure all dependencies are installed.")
    except FileNotFoundError as fe:
        console.print(f"[bold red]FileNotFoundError during NLPAnalyzer test:[/bold red] {fe}. Check model paths or data files.")
    except Exception as e:
        console.print(f"[bold red]NLPAnalyzer test failed unexpectedly:[/bold red] {e}")

    console.print("\n[bold green]NLPAnalyzer test finished.[/bold green]")
