"""
Multimodal data integration utilities.

This module provides functions for integrating multiple data modalities
(spatiotemporal, biomechanical, physiological) into a unified feature representation.
"""

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

def integrate_spatiotemporal_features(df, spatial_cols, temporal_cols):
    """
    Extract and normalize spatiotemporal features.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing sports data
    spatial_cols : list of str
        List of spatial feature column names
    temporal_cols : list of str
        List of temporal feature column names
    
    Returns:
    --------
    X_spatiotemporal : ndarray
        Array of normalized spatiotemporal features
    """
    # Extract features
    feature_cols = spatial_cols + temporal_cols
    X = df[feature_cols].values
    
    # Normalize features
    scaler = StandardScaler()
    X_normalized = scaler.fit_transform(X)
    
    return X_normalized

def integrate_biomechanical_features(biomech_data, player_ids, timestamps):
    """
    Extract and normalize biomechanical features.
    
    Parameters:
    -----------
    biomech_data : dict
        Dictionary mapping (player_id, timestamp) to biomechanical features
    player_ids : ndarray
        Array of player IDs
    timestamps : ndarray
        Array of timestamps
    
    Returns:
    --------
    X_biomechanical : ndarray
        Array of normalized biomechanical features
    """
    # Initialize feature array
    n_samples = len(player_ids)
    n_features = len(next(iter(biomech_data.values())))  # Get number of features from first item
    X = np.zeros((n_samples, n_features))
    
    # Fill feature array
    for i in range(n_samples):
        player_id = player_ids[i]
        timestamp = timestamps[i]
        
        # Get biomechanical features for this player and timestamp
        key = (player_id, timestamp)
        if key in biomech_data:
            X[i] = biomech_data[key]
    
    # Normalize features
    scaler = StandardScaler()
    X_normalized = scaler.fit_transform(X)
    
    return X_normalized

def integrate_physiological_features(physio_data, player_ids, timestamps):
    """
    Extract and normalize physiological features.
    
    Parameters:
    -----------
    physio_data : dict
        Dictionary mapping (player_id, timestamp) to physiological features
    player_ids : ndarray
        Array of player IDs
    timestamps : ndarray
        Array of timestamps
    
    Returns:
    --------
    X_physiological : ndarray
        Array of normalized physiological features
    """
    # Initialize feature array
    n_samples = len(player_ids)
    n_features = len(next(iter(physio_data.values())))  # Get number of features from first item
    X = np.zeros((n_samples, n_features))
    
    # Fill feature array
    for i in range(n_samples):
        player_id = player_ids[i]
        timestamp = timestamps[i]
        
        # Get physiological features for this player and timestamp
        key = (player_id, timestamp)
        if key in physio_data:
            X[i] = physio_data[key]
    
    # Normalize features
    scaler = StandardScaler()
    X_normalized = scaler.fit_transform(X)
    
    return X_normalized

def integrate_multimodal_features(X_spatiotemporal, X_biomechanical=None, X_physiological=None,
                                weights=(1.0, 0.5, 0.3)):
    """
    Integrate features from multiple modalities.
    
    Parameters:
    -----------
    X_spatiotemporal : ndarray
        Array of normalized spatiotemporal features
    X_biomechanical : ndarray, optional
        Array of normalized biomechanical features
    X_physiological : ndarray, optional
        Array of normalized physiological features
    weights : tuple, optional
        Weights for each modality (spatiotemporal, biomechanical, physiological)
    
    Returns:
    --------
    X_integrated : ndarray
        Array of integrated features
    """
    # Initialize list of weighted features
    weighted_features = [weights[0] * X_spatiotemporal]
    
    # Add biomechanical features if available
    if X_biomechanical is not None:
        weighted_features.append(weights[1] * X_biomechanical)
    
    # Add physiological features if available
    if X_physiological is not None:
        weighted_features.append(weights[2] * X_physiological)
    
    # Concatenate features
    X_integrated = np.concatenate(weighted_features, axis=1)
    
    return X_integrated

def extract_multimodal_features(df, biomech_data=None, physio_data=None,
                              spatial_cols=None, temporal_cols=None,
                              player_id_col='player_id', timestamp_col='date_time',
                              weights=(1.0, 0.5, 0.3)):
    """
    Extract and integrate features from multiple modalities.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing sports data
    biomech_data : dict, optional
        Dictionary mapping (player_id, timestamp) to biomechanical features
    physio_data : dict, optional
        Dictionary mapping (player_id, timestamp) to physiological features
    spatial_cols : list of str, optional
        List of spatial feature column names
    temporal_cols : list of str, optional
        List of temporal feature column names
    player_id_col : str, optional
        Name of the column containing player IDs
    timestamp_col : str, optional
        Name of the column containing timestamps
    weights : tuple, optional
        Weights for each modality (spatiotemporal, biomechanical, physiological)
    
    Returns:
    --------
    X_integrated : ndarray
        Array of integrated features
    """
    # Define default spatial and temporal columns if not provided
    if spatial_cols is None:
        spatial_cols = ['x', 'y', 'distance_to_net', 'shot_angle']
    
    if temporal_cols is None:
        temporal_cols = ['game_seconds', 'period_seconds_remaining', 'score_differential']
    
    # Extract spatiotemporal features
    X_spatiotemporal = integrate_spatiotemporal_features(df, spatial_cols, temporal_cols)
    
    # Extract biomechanical features if available
    X_biomechanical = None
    if biomech_data is not None:
        player_ids = df[player_id_col].values
        timestamps = df[timestamp_col].values
        X_biomechanical = integrate_biomechanical_features(biomech_data, player_ids, timestamps)
    
    # Extract physiological features if available
    X_physiological = None
    if physio_data is not None:
        player_ids = df[player_id_col].values
        timestamps = df[timestamp_col].values
        X_physiological = integrate_physiological_features(physio_data, player_ids, timestamps)
    
    # Integrate features
    X_integrated = integrate_multimodal_features(
        X_spatiotemporal, X_biomechanical, X_physiological, weights)
    
    return X_integrated


