"""
NHL-specific feature engineering and adjustments.

This module provides functions for NHL-specific feature engineering and
adjustments to account for the unique characteristics of hockey data.
"""

import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression

def model_goalie_save_probability(shots_df, min_shots_per_goalie=50):
    """
    Fit a mixed effects model for goalie save probability.
    
    Parameters:
    -----------
    shots_df : pandas.DataFrame
        DataFrame containing shot data
    min_shots_per_goalie : int, optional (default=50)
        Minimum number of shots faced by a goalie to be included in the model
    
    Returns:
    --------
    model : fitted model object
        The fitted mixed effects model
    goalie_effects : dict
        Dictionary mapping goalie IDs to their random effects
    """
    # Filter goalies with sufficient data
    goalie_counts = shots_df['goalie_id'].value_counts()
    valid_goalies = goalie_counts[goalie_counts >= min_shots_per_goalie].index
    filtered_df = shots_df[shots_df['goalie_id'].isin(valid_goalies)].copy()
    
    # Ensure target variable is integer type
    filtered_df['save'] = (~filtered_df['is_goal']).astype(int)
    
    # Ensure goalie_id is string type for statsmodels
    filtered_df['goalie_id'] = filtered_df['goalie_id'].astype(str)
    
    # Prepare formula for mixed effects model
    formula = ("save ~ shot_distance + shot_angle + C(shot_type) + "
               "rebound + rush_shot + score_differential + "
               "period_seconds_remaining + (1|goalie_id)")
    
    try:
        # Try fitting mixed effects model
        model = smf.mixedlm(formula, filtered_df, groups=filtered_df['goalie_id'])
        result = model.fit(reml=True)
        
        # Extract random effects
        goalie_effects = dict(zip(result.groups.index, result.random_effects[0]))
        
    except Exception as e:
        print(f"Mixed effects model failed: {e}")
        print("Falling back to logistic regression...")
        
        # Fall back to logistic regression with L2 regularization
        X = filtered_df[['shot_distance', 'shot_angle', 'rebound', 'rush_shot', 
                        'score_differential', 'period_seconds_remaining']]
        
        # Add dummy variables for categorical features
        X = pd.get_dummies(filtered_df[['shot_distance', 'shot_angle', 'shot_type',
                                      'rebound', 'rush_shot',
                                      'score_differential', 'period_seconds_remaining']], 
                          drop_first=True)
        
        y = filtered_df['save']
        
        # Fit separate model for each goalie
        goalie_effects = {}
        
        # First, fit a global model
        global_model = LogisticRegression(penalty='l2', C=10.0, 
                                        solver='liblinear', random_state=42)
        global_model.fit(X, y)
        global_intercept = global_model.intercept_[0]
        
        # Then, fit individual models and compare to global
        for goalie_id in valid_goalies:
            goalie_mask = filtered_df['goalie_id'] == str(goalie_id)
            if np.sum(goalie_mask) >= min_shots_per_goalie:
                X_goalie = X[goalie_mask]
                y_goalie = y[goalie_mask]
                
                goalie_model = LogisticRegression(penalty='l2', C=10.0, 
                                               solver='liblinear', random_state=42)
                goalie_model.fit(X_goalie, y_goalie)
                
                # Calculate effect as difference from global intercept
                goalie_effects[goalie_id] = goalie_model.intercept_[0] - global_intercept
    
    return model, goalie_effects

def adjust_shooter_performance(shots_df, goalie_effects):
    """
    Adjust shooter performance metrics to account for the quality of goalies faced.
    
    Parameters:
    -----------
    shots_df : pandas.DataFrame
        DataFrame containing shot data
    goalie_effects : dict
        Dictionary mapping goalie IDs to their random effects
    
    Returns:
    --------
    adjusted_df : pandas.DataFrame
        DataFrame with adjusted shooter performance metrics
    """
    # Make a copy to avoid modifying the original
    adjusted_df = shots_df.copy()
    
    # Add goalie effects to the DataFrame
    adjusted_df['goalie_effect'] = adjusted_df['goalie_id'].map(goalie_effects)
    
    # Fill missing effects with 0
    adjusted_df['goalie_effect'] = adjusted_df['goalie_effect'].fillna(0)
    
    # Calculate expected save probability for each shot
    adjusted_df['expected_save_prob'] = 1 / (1 + np.exp(-adjusted_df['goalie_effect']))
    
    # Calculate expected goals for each shot
    adjusted_df['expected_goal'] = 1 - adjusted_df['expected_save_prob']
    
    # Calculate adjusted goal rate for each player
    player_stats = adjusted_df.groupby('shooter_id').agg({
        'is_goal': 'sum',
        'expected_goal': 'sum',
        'shot_id': 'count'
    }).reset_index()
    
    player_stats.columns = ['shooter_id', 'goals', 'expected_goals', 'shots']
    
    # Calculate raw and adjusted goal rates
    player_stats['raw_goal_rate'] = player_stats['goals'] / player_stats['shots']
    player_stats['adjusted_goal_rate'] = player_stats['goals'] / player_stats['expected_goals'] * \
                                       np.mean(player_stats['goals'] / player_stats['shots'])
    
    # Merge player stats back to original DataFrame
    adjusted_df = pd.merge(adjusted_df, player_stats[['shooter_id', 'adjusted_goal_rate']], 
                         on='shooter_id', how='left')
    
    return adjusted_df

def calculate_goalie_quality_index(shots_df, goalie_effects):
    """
    Calculate Goalie Quality Index (GQI) for each player.
    
    Parameters:
    -----------
    shots_df : pandas.DataFrame
        DataFrame containing shot data
    goalie_effects : dict
        Dictionary mapping goalie IDs to their random effects
    
    Returns:
    --------
    player_gqi : pandas.DataFrame
        DataFrame containing Goalie Quality Index for each player
    """
    # Make a copy to avoid modifying the original
    df = shots_df.copy()
    
    # Add goalie effects to the DataFrame
    df['goalie_effect'] = df['goalie_id'].map(goalie_effects)
    
    # Fill missing effects with 0
    df['goalie_effect'] = df['goalie_effect'].fillna(0)
    
    # Calculate GQI for each player
    player_gqi = df.groupby('shooter_id').agg({
        'goalie_effect': 'mean',
        'shot_id': 'count'
    }).reset_index()
    
    player_gqi.columns = ['shooter_id', 'goalie_quality_index', 'shots']
    
    # Normalize GQI to have mean 0 and standard deviation 1
    mean_gqi = player_gqi['goalie_quality_index'].mean()
    std_gqi = player_gqi['goalie_quality_index'].std()
    
    if std_gqi > 0:
        player_gqi['goalie_quality_index'] = (player_gqi['goalie_quality_index'] - mean_gqi) / std_gqi
    
    return player_gqi

def integrate_goalie_adjustments(shots_df, min_shots_per_goalie=50):
    """
    Integrate all goalie adjustment methods.
    
    Parameters:
    -----------
    shots_df : pandas.DataFrame
        DataFrame containing shot data
    min_shots_per_goalie : int, optional (default=50)
        Minimum number of shots faced by a goalie to be included in the model
    
    Returns:
    --------
    adjusted_df : pandas.DataFrame
        DataFrame with all goalie adjustments
    model : fitted model object
        The fitted goalie save probability model
    goalie_effects : dict
        Dictionary mapping goalie IDs to their random effects
    """
    # Fit goalie save probability model
    model, goalie_effects = model_goalie_save_probability(shots_df, min_shots_per_goalie)
    
    # Adjust shooter performance
    adjusted_df = adjust_shooter_performance(shots_df, goalie_effects)
    
    # Calculate Goalie Quality Index
    player_gqi = calculate_goalie_quality_index(shots_df, goalie_effects)
    
    # Merge GQI back to adjusted DataFrame
    adjusted_df = pd.merge(adjusted_df, player_gqi[['shooter_id', 'goalie_quality_index']], 
                         on='shooter_id', how='left')
    
    return adjusted_df, model, goalie_effects


