"""
Feature engineering utilities for sports data.

This module provides functions for generating advanced features from sports data.
"""

import pandas as pd
import numpy as np
from scipy.spatial.distance import euclidean

def add_time_features(df, time_col='game_seconds'):
    """
    Add time-related features.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing sports data
    time_col : str, optional (default='game_seconds')
        Name of the column containing game time in seconds
    
    Returns:
    --------
    df : pandas.DataFrame
        DataFrame with added time features
    """
    # Make a copy to avoid modifying the original
    df = df.copy()
    
    # Calculate normalized game time (0 to 1)
    if 'period' in df.columns:
        # For sports with periods/quarters
        max_period = df['period'].max()
        period_seconds = 1200 if max_period <= 3 else 720  # NHL: 1200, NBA: 720
        max_seconds = max_period * period_seconds
        df['normalized_time'] = df[time_col] / max_seconds
    else:
        # For sports without periods (e.g., baseball)
        df['normalized_time'] = df[time_col] / df[time_col].max()
    
    # Create time bin features (early, middle, late)
    df['time_bin'] = pd.cut(
        df['normalized_time'],
        bins=[0, 0.33, 0.67, 1],
        labels=['early', 'middle', 'late']
    )
    
    # Create dummy variables for time bins
    time_dummies = pd.get_dummies(df['time_bin'], prefix='time')
    df = pd.concat([df, time_dummies], axis=1)
    
    return df

def add_score_differential_features(df, score_diff_col='score_differential'):
    """
    Add score differential features.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing sports data
    score_diff_col : str, optional (default='score_differential')
        Name of the column containing score differential
    
    Returns:
    --------
    df : pandas.DataFrame
        DataFrame with added score differential features
    """
    # Make a copy to avoid modifying the original
    df = df.copy()
    
    # Ensure score differential column exists
    if score_diff_col not in df.columns:
        print(f"Warning: {score_diff_col} column not found. Skipping score differential features.")
        return df
    
    # Create score situation features
    df['leading'] = df[score_diff_col] > 0
    df['tied'] = df[score_diff_col] == 0
    df['trailing'] = df[score_diff_col] < 0
    
    # Create score margin features
    df['close_game'] = df[score_diff_col].abs() <= 2
    df['medium_margin'] = (df[score_diff_col].abs() > 2) & (df[score_diff_col].abs() <= 5)
    df['large_margin'] = df[score_diff_col].abs() > 5
    
    # Calculate pressure index
    if 'period_seconds_remaining' not in df.columns and 'period_seconds' in df.columns:
        # Calculate remaining seconds if not already present
        period_seconds = 1200 if df['period'].max() <= 3 else 720  # NHL: 1200, NBA: 720
        df['period_seconds_remaining'] = period_seconds - df['period_seconds']
    
    if 'period' in df.columns and 'period_seconds_remaining' in df.columns:
        # Higher pressure when:
        # - Later in the game
        # - Score is close
        # - Less time remaining
        max_period = df['period'].max()
        df['is_late_game'] = df['period'] >= (max_period - 1)
        
        df['pressure_index'] = (
            (df['is_late_game'].astype(int) * 2) +
            (df['close_game'].astype(int) * 3) +
            (1 / (df['period_seconds_remaining'] + 1) * 100)
        )
        
        # Normalize pressure index to 0-1
        df['pressure_index'] = (df['pressure_index'] - df['pressure_index'].min()) / \
                             (df['pressure_index'].max() - df['pressure_index'].min())
    
    return df

def add_spatial_features(df, x_col='x', y_col='y', sport='nhl'):
    """
    Add spatial features based on x, y coordinates.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing sports data
    x_col : str, optional (default='x')
        Name of the column containing x-coordinates
    y_col : str, optional (default='y')
        Name of the column containing y-coordinates
    sport : str, optional (default='nhl')
        Sport name ('nhl', 'mlb', or 'nba')
    
    Returns:
    --------
    df : pandas.DataFrame
        DataFrame with added spatial features
    """
    # Make a copy to avoid modifying the original
    df = df.copy()
    
    # Check if coordinate columns exist
    if x_col not in df.columns or y_col not in df.columns:
        print(f"Warning: Coordinate columns ({x_col}, {y_col}) not found. Skipping spatial features.")
        return df
    
    # Fill missing coordinates with zeros
    df[x_col] = df[x_col].fillna(0)
    df[y_col] = df[y_col].fillna(0)
    
    if sport.lower() == 'nhl':
        # Calculate distance to net
        # NHL rink coordinates: net is at x=89, y=0 (right side) or x=-89, y=0 (left side)
        # Assume shots are always toward the opponent's net
        df['distance_to_net'] = np.sqrt((df[x_col] - 89)**2 + df[y_col]**2)
        
        # Calculate shot angle (in degrees)
        df['shot_angle'] = np.abs(np.degrees(np.arctan2(df[y_col], 89 - df[x_col])))
        
        # Create zone features
        df['is_slot'] = (df['distance_to_net'] < 30) & (df['shot_angle'] < 30)
        df['is_point'] = df['distance_to_net'] > 50
        df['is_behind_net'] = df[x_col] > 89
    
    elif sport.lower() == 'nba':
        # Calculate distance to basket
        # NBA court coordinates: basket is at x=0, y=0
        df['distance_to_basket'] = np.sqrt(df[x_col]**2 + df[y_col]**2)
        
        # Calculate shot angle (in degrees)
        df['shot_angle'] = np.abs(np.degrees(np.arctan2(df[y_col], df[x_col])))
        
        # Create zone features
        df['is_paint'] = (np.abs(df[x_col]) < 8) & (df[y_col] < 19) & (df[y_col] > 0)
        df['is_corner_three'] = (df['distance_to_basket'] > 22) & (np.abs(df[y_col]) > 22)
        df['is_above_break_three'] = (df['distance_to_basket'] > 23.75) & ~df['is_corner_three']
        df['is_midrange'] = ~df['is_paint'] & ~df['is_corner_three'] & ~df['is_above_break_three']
    
    elif sport.lower() == 'mlb':
        # Calculate distance from home plate
        # MLB coordinates: home plate is at x=0, y=0
        df['distance_from_plate'] = np.sqrt(df[x_col]**2 + df[y_col]**2)
        
        # Calculate launch angle (in degrees)
        if 'launch_angle' not in df.columns and 'launch_speed' in df.columns:
            # Estimate launch angle from coordinates and launch speed
            # This is a simplified approximation
            df['launch_angle'] = np.degrees(np.arctan2(df[y_col], df[x_col]))
        
        # Create field zone features
        df['is_infield'] = df['distance_from_plate'] < 95
        df['is_outfield'] = ~df['is_infield']
        
        # Create pull/opposite field features
        if 'stand' in df.columns:  # Batter handedness
            df['is_pull'] = ((df['stand'] == 'R') & (df[x_col] < 0)) | \
                          ((df['stand'] == 'L') & (df[x_col] > 0))
            df['is_opposite'] = ((df['stand'] == 'R') & (df[x_col] > 0)) | \
                              ((df['stand'] == 'L') & (df[x_col] < 0))
            df['is_center'] = ~df['is_pull'] & ~df['is_opposite']
    
    return df

def add_rolling_stats(df, player_id_col, outcome_col, window_sizes=[5, 10, 20]):
    """
    Add rolling statistics for each player.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing sports data
    player_id_col : str
        Name of the column containing player IDs
    outcome_col : str
        Name of the column containing outcome (1 for success, 0 for failure)
    window_sizes : list of int, optional
        List of window sizes for rolling statistics
    
    Returns:
    --------
    df : pandas.DataFrame
        DataFrame with added rolling statistics
    """
    # Make a copy to avoid modifying the original
    df = df.copy()
    
    # Sort by player and time
    if 'date_time' in df.columns:
        df = df.sort_values([player_id_col, 'date_time'])
    else:
        df = df.sort_values([player_id_col, 'game_id', 'game_seconds'])
    
    # Calculate rolling statistics for each window size
    for window in window_sizes:
        # Calculate rolling success rate
        rolling_mean = df.groupby(player_id_col)[outcome_col].transform(
            lambda x: x.rolling(window, min_periods=1).mean()
        )
        df[f'rolling_{outcome_col}_{window}'] = rolling_mean
        
        # Calculate streak (consecutive successes or failures)
        streak = df.groupby(player_id_col)[outcome_col].transform(
            lambda x: (x.groupby((x != x.shift()).cumsum()).cumsum() * x) - \
                    ((1 - x).groupby((x != x.shift()).cumsum()).cumsum() * (1 - x))
        )
        df[f'streak_{window}'] = streak.clip(-window, window)
    
    return df

def add_opponent_features(df, opponent_id_col, outcome_col):
    """
    Add opponent-related features.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing sports data
    opponent_id_col : str
        Name of the column containing opponent IDs (e.g., goalie_id, pitcher_id)
    outcome_col : str
        Name of the column containing outcome (1 for success, 0 for failure)
    
    Returns:
    --------
    df : pandas.DataFrame
        DataFrame with added opponent features
    """
    # Make a copy to avoid modifying the original
    df = df.copy()
    
    # Check if opponent column exists
    if opponent_id_col not in df.columns:
        print(f"Warning: {opponent_id_col} column not found. Skipping opponent features.")
        return df
    
    # Calculate opponent success rate (against)
    opponent_stats = df.groupby(opponent_id_col)[outcome_col].agg(['mean', 'count']).reset_index()
    opponent_stats.columns = [opponent_id_col, 'opponent_success_rate', 'opponent_count']
    
    # Merge opponent stats back to original dataframe
    df = pd.merge(df, opponent_stats, on=opponent_id_col, how='left')
    
    # Calculate opponent quality index (standardized success rate)
    mean_success_rate = df['opponent_success_rate'].mean()
    std_success_rate = df['opponent_success_rate'].std()
    
    if std_success_rate > 0:
        df['opponent_quality_index'] = (df['opponent_success_rate'] - mean_success_rate) / std_success_rate
    else:
        df['opponent_quality_index'] = 0
    
    return df

def add_game_context_features(df):
    """
    Add game context features.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing sports data
    
    Returns:
    --------
    df : pandas.DataFrame
        DataFrame with added game context features
    """
    # Make a copy to avoid modifying the original
    df = df.copy()
    
    # Add home/away indicator if not present
    if 'is_home' not in df.columns and 'team_id' in df.columns and 'home_team_id' in df.columns:
        df['is_home'] = df['team_id'] == df['home_team_id']
    
    # Add playoff indicator if not present
    if 'is_playoff' not in df.columns and 'game_type' in df.columns:
        df['is_playoff'] = df['game_type'].isin(['P', 'Playoffs'])
    
    # Add day/night indicator for MLB
    if 'day_night' in df.columns:
        df['is_night_game'] = df['day_night'] == 'N'
    
    # Add back-to-back indicator for NBA/NHL
    if 'date_time' in df.columns and 'team_id' in df.columns:
        df['date'] = pd.to_datetime(df['date_time']).dt.date
        
        # Calculate days since last game for each team
        df['prev_game_date'] = df.groupby('team_id')['date'].shift(1)
        df['days_since_last_game'] = (df['date'] - df['prev_game_date']).dt.days
        
        # Back-to-back is when days_since_last_game = 1
        df['is_back_to_back'] = df['days_since_last_game'] == 1
        
        # Clean up temporary columns
        df = df.drop(columns=['prev_game_date'])
    
    return df


