"""
Rule generation utilities for ABBR value experiments.
"""

import numpy as np
import pandas as pd
import random
from typing import List, Tuple
import sys
import os

# Add parent directory to path to import rules
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from rules import Rule, Condition, Operator
from datasets import Dataset

def generate_random_rules(
    X: pd.DataFrame, 
    y_quantile: pd.Series, 
    num_rules: int,
    max_conditions: int,
    min_support: float,
    max_valid_rules: int,
    verbose: bool = False
) -> List[Rule]:
    """
    Generate random rules with up to max_conditions and at least min_support.
    
    Args:
        X: Feature matrix
        y_quantile: Target quantile values
        num_rules: Total number of rules to attempt generating
        max_conditions: Maximum number of conditions per rule
        min_support: Minimum support (coverage) required for a rule
        max_valid_rules: Stop generating after this many valid rules
        verbose: Whether to print debugging information
        
    Returns:
        List of valid rules
    """
    rules = []
    features = X.columns.tolist()
    
    # Debug: Track support values
    all_support_values = []
    rejected_count = 0
    
    for _ in range(num_rules):
        # Randomly choose number of conditions (1 to max_conditions)
        num_conditions = random.randint(1, max_conditions)
        
        # Randomly select features without replacement
        selected_features = random.sample(features, min(num_conditions, len(features)))
        
        conditions = []
        for feature in selected_features:
            # More robust detection of categorical/binary features
            is_categorical = (
                X[feature].dtype == 'object' or  # String/object type
                X[feature].dtype == 'bool' or    # Boolean type
                X[feature].nunique() <= 2 or     # Binary (including with NaN)
                str(X[feature].dtype).startswith('category')  # Pandas categorical
            )
            
            if is_categorical:
                # Categorical or binary feature - use equality
                unique_values = X[feature].dropna().unique()  # Remove NaN values
                if len(unique_values) > 0:
                    value = random.choice(unique_values)
                    operator = Operator.EQUAL
                else:
                    continue  # Skip features with no valid values
            else:
                # Numerical feature - use threshold
                try:
                    # Choose threshold based on quantiles to get reasonable splits
                    quantile = random.uniform(0.1, 0.9)
                    value = np.quantile(X[feature].dropna(), quantile)  # Remove NaN values
                    operator = random.choice([Operator.LESS, Operator.GREATER])
                except (TypeError, ValueError) as e:
                    # If quantile calculation fails, treat as categorical
                    unique_values = X[feature].dropna().unique()
                    if len(unique_values) > 0:
                        value = random.choice(unique_values)
                        operator = Operator.EQUAL
                    else:
                        continue  # Skip features with no valid values
            
            conditions.append(Condition(feature, operator, value))
        
        # Skip if no valid conditions were created
        if len(conditions) == 0:
            continue
            
        rule = Rule(conditions)
        
        # Check if rule has sufficient support
        mask = rule.get_mask(X)
        support = np.mean(mask)
        all_support_values.append(support)
        
        if support >= min_support:
            rules.append(rule)
        else:
            rejected_count += 1
            
        # Stop if we have enough valid rules
        if len(rules) >= max_valid_rules:
            break
    
    # Debug: Print support statistics
    if verbose and len(all_support_values) > 0:
        print(f"  Support statistics for {len(all_support_values)} generated rules:")
        print(f"    Min support: {np.min(all_support_values):.4f}")
        print(f"    Max support: {np.max(all_support_values):.4f}")
        print(f"    Mean support: {np.mean(all_support_values):.4f}")
        print(f"    Median support: {np.median(all_support_values):.4f}")
        print(f"    Rules rejected (support < {min_support}): {rejected_count}")
        print(f"    Percentage rejected: {rejected_count / len(all_support_values) * 100:.1f}%")
    
    return rules

def calculate_rule_metrics(
    rules: List[Rule], 
    X: pd.DataFrame, 
    y_quantile: pd.Series, 
    y_binary: pd.Series
) -> pd.DataFrame:
    """
    Calculate ABBR and consistency metrics for each rule.
    
    Args:
        rules: List of rules to evaluate
        X: Feature matrix
        y_quantile: Target quantile values
        y_binary: Binary target values (for consistency calculation)
        
    Returns:
        DataFrame with rule metrics
    """
    rule_metrics = []
    
    for rule in rules:
        mask = rule.get_mask(X)
        support = np.mean(mask)
        
        if support > 0:  # Avoid division by zero
            # ABBR: Average quantile of covered points
            abbr = np.mean(y_quantile[mask])
            
            # Consistency: Proportion of covered points that are positive (with threshold)
            consistency = np.mean(y_binary[mask])
            
            rule_metrics.append({
                'rule': rule,
                'support': support,
                'abbr': abbr,
                'consistency': consistency
            })
    
    return pd.DataFrame(rule_metrics)

def evaluate_rule_on_test(
    rule: Rule, 
    X_test: pd.DataFrame, 
    y_test_binary: pd.Series
) -> Tuple[float, float]:
    """
    Evaluate a single rule on test set.
    
    Args:
        rule: Rule to evaluate
        X_test: Test feature matrix
        y_test_binary: Test binary target values
        
    Returns:
        Tuple of (test_consistency, test_support)
    """
    mask = rule.get_mask(X_test)
    
    if np.sum(mask) > 0:  # Rule covers some points
        test_consistency = np.mean(y_test_binary[mask])
        test_support = np.mean(mask)
    else:
        test_consistency = 0
        test_support = 0
        
    return test_consistency, test_support

def analyze_feature_types(X: pd.DataFrame, verbose: bool = True) -> dict:
    """
    Analyze feature types in the dataset.
    
    Args:
        X: Feature matrix
        verbose: Whether to print analysis
        
    Returns:
        Dictionary with feature type counts
    """
    feature_types = {
        'object': 0,
        'bool': 0, 
        'binary_numeric': 0,
        'numeric': 0,
        'other': 0
    }

    for feature in X.columns:
        if X[feature].dtype == 'object':
            feature_types['object'] += 1
        elif X[feature].dtype == 'bool':
            feature_types['bool'] += 1
        elif X[feature].nunique() <= 2:
            feature_types['binary_numeric'] += 1
        elif np.issubdtype(X[feature].dtype, np.number):
            feature_types['numeric'] += 1
        else:
            feature_types['other'] += 1

    if verbose:
        print(f"Feature type analysis:")
        for ftype, count in feature_types.items():
            if count > 0:
                print(f"  {ftype}: {count} features")
        print(f"  Sample feature dtypes: {dict(list(X.dtypes.value_counts().head().items()))}")
        
    return feature_types 