import json                                                     
import os                        
import yaml                          
import re                                    

CONFIG_PATH = os.path.join(os.path.dirname(__file__), "..", "config", "settings.yaml")
OPTIMIZED_PERPLEXITY_PARAMS_DEFAULT_PATH = os.path.join(os.path.dirname(__file__), "..", "config", "optimized_perplexity_params.json")


def _replace_base_project_dir(obj, base_path):
    """Recursively replace <BASE_PROJECT_DIR> in all string values in a dict/list structure."""
    if isinstance(obj, dict):
        return {k: _replace_base_project_dir(v, base_path) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [_replace_base_project_dir(v, base_path) for v in obj]
    elif isinstance(obj, str):
        return obj.replace("<BASE_PROJECT_DIR>", base_path)
    else:
        return obj


def get_config():
    if not hasattr(get_config, "config"):
        with open(CONFIG_PATH, 'r') as f:
            config_data = yaml.safe_load(f)

                                                         
        base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
        config_data = _replace_base_project_dir(config_data, base_path)

                                              
        perplexity_analyzer_config = config_data.get("perplexity_analyzer", {})
        optimized_params_file_name = perplexity_analyzer_config.get("optimized_params_file", "optimized_perplexity_params.json")
                                                                                                 
        optimized_params_path = os.path.join(os.path.dirname(CONFIG_PATH), optimized_params_file_name)

        if os.path.exists(optimized_params_path):
            with open(optimized_params_path, 'r') as f_params:
                loaded_json_content = json.load(f_params)
            
                                                                              
                                                                                       
            if "category_specific_settings" in loaded_json_content:
                perplexity_analyzer_config["category_specific_settings"] = loaded_json_content["category_specific_settings"]
                                                                                                  

                                                                                                      
            if "default_engine_settings" in loaded_json_content:
                if "default_engine_settings" not in perplexity_analyzer_config:
                    perplexity_analyzer_config["default_engine_settings"] = {}                            
                perplexity_analyzer_config["default_engine_settings"].update(loaded_json_content["default_engine_settings"])

        elif "default_engine_settings" not in perplexity_analyzer_config:
                                                                                                            
                                                                   
                                                                                                                
            print(f"Warning: Optimized perplexity parameters file '{optimized_params_path}' not found, and default_engine_settings might be incomplete.")
        
        config_data["perplexity_analyzer"] = perplexity_analyzer_config
        get_config.config = config_data
    return get_config.config

                                      
                                        
                                                            

class Config:
    def __init__(self):
        settings = get_config()

                        
        model_settings = settings.get("embedding_model", {})
        self.model_name = model_settings.get("model_name", "default_model")
                                                                                                       

                       
        data_settings = settings.get("data", {})
                                                                                                       
                                                                                                    

                                                                    
        nlp_settings = settings.get("nlp", {})                                   
        self.unsupported_language_fallback = nlp_settings.get("unsupported_language_fallback", "log_warning_return_none")
                                                                                                     
                                                                                                         
                                                                                       

                                
        csv_splitting_settings = settings.get("csv_splitting", {})
        self.default_split_ratio = csv_splitting_settings.get("default_split_ratio", 0.8)
        self.split_column_name = csv_splitting_settings.get("split_column_name", "split")
        self.database_split_value = csv_splitting_settings.get("database_split_value", "database")
        self.benchmark_split_value = csv_splitting_settings.get("benchmark_split_value", "benchmark")
        self.cleared_split_value = csv_splitting_settings.get("cleared_split_value", "")

                                     
        detection_pipeline_settings = settings.get("detection_pipeline", {})
                                                                                   
        top_k_value = detection_pipeline_settings.get("top_k_semantic_search", 10)
        try:
            self.top_k_semantic_search = int(top_k_value)
        except (ValueError, TypeError):
            self.top_k_semantic_search = 10
            
        self.ensemble_strategy = detection_pipeline_settings.get("ensemble_strategy", "vector_dominant")        
        self.perplexity_dominant_unsafe_threshold = detection_pipeline_settings.get("perplexity_dominant_unsafe_threshold", 0.75)        
        self.vector_dominant_safe_distance_threshold = detection_pipeline_settings.get("vector_dominant_safe_distance_threshold", 0.1)        
        self.vector_dominant_safe_perplexity_threshold = detection_pipeline_settings.get("vector_dominant_safe_perplexity_threshold", 0.25)        
        self.sentence_perplexity_unsafe_threshold = detection_pipeline_settings.get("sentence_perplexity_unsafe_threshold", 0.6)

                                                  
        self.weighted_majority_mixed_label_ratio_threshold = detection_pipeline_settings.get("weighted_majority_mixed_label_ratio_threshold", 0.3)        
        self.weighted_majority_vote_weights = detection_pipeline_settings.get(
            "weighted_majority_vote_weights", 
            {"default_primary": 0.6, "default_perplexity": 0.4, "mixed_primary": 0.3, "mixed_perplexity": 0.7}                 
        )        

                             
        clustering_settings = settings.get("clustering", {})
        self.cluster_assignment_top_k = clustering_settings.get("cluster_assignment_top_k", None)                                             
        self.cluster_field_name_for_assignment = clustering_settings.get("cluster_field_name_for_assignment", "prompt_category")

                                      
        perplexity_analyzer_settings = settings.get("perplexity_analyzer", {})
                                                     
        self.default_perplexity_engine_settings = perplexity_analyzer_settings.get("default_engine_settings", {})
                                                        
        self.category_specific_perplexity_settings = perplexity_analyzer_settings.get("category_specific_settings", {})
        
                                     
        self.dynamic_perplexity_enabled = perplexity_analyzer_settings.get("dynamic_perplexity_enabled", True)
        self.dynamic_perplexity_top_k = perplexity_analyzer_settings.get("dynamic_perplexity_top_k", None)                                                                         
        
                                                                                                          
        self.model_for_log_probs = self.default_perplexity_engine_settings.get(
            "model_for_log_probs", 
            perplexity_analyzer_settings.get("model_for_log_probs", "google/gemma-3-1b-it")                                   
        )
                                                                                                   
                                      
        expected_keys = [
            "adversarial_token_uniform_log_prob", "lambda_smoothness_penalty", 
            "mu_adversarial_token_prior", "apply_first_token_neutral_bias", 
            "sentence_adversarial_probability_threshold", "model_for_log_probs"
        ]
        for key in expected_keys:
            if key not in self.default_perplexity_engine_settings:
                                                                                                             
                                                                                                                     
                pass


class GlobalConfig(Config):                                               
    _instance = None

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super(GlobalConfig, cls).__new__(cls)
            cls._instance._initialized = False
        return cls._instance

    def __init__(self):
        if self._initialized:
            return
        super().__init__()
        self._initialized = True

def get_global_config():                                                 
    return GlobalConfig()
