"""
Preprocessing Pipeline for Multi-Modal Biometric Data
Generated by AI Research Agent for Agents4Science 2025
"""

import numpy as np
import cv2
import librosa
from scipy.signal import butter, lfilter
import torch

class Preprocessor:
    def __init__(self, face_size=(224, 224), voice_sr=16000, n_mfcc=13, mfcc_length=100,
                 behavioral_dim=30, augment=False):
        self.face_size = face_size
        self.voice_sr = voice_sr
        self.n_mfcc = n_mfcc
        self.mfcc_length = mfcc_length
        self.behavioral_dim = behavioral_dim
        self.augment = augment

    def preprocess_face(self, image, training=True):
        # Assuming image is a numpy array (H, W, C)
        # 1. Resize
        image = cv2.resize(image, self.face_size, interpolation=cv2.INTER_AREA)
        # 2. Normalize to [-1, 1]
        image = (image - 0.5) * 2
        # 3. Data Augmentation (if training)
        if training and self.augment:
            image = self._augment_face(image)
        # Convert to PyTorch tensor and change channel order (C, H, W)
        image = torch.from_numpy(image).permute(2, 0, 1).float()
        return image

    def _augment_face(self, image):
        # Example augmentations: rotation, translation, noise
        # Rotation
        angle = np.random.uniform(-15, 15)
        M = cv2.getRotationMatrix2D((self.face_size[0] / 2, self.face_size[1] / 2), angle, 1)
        image = cv2.warpAffine(image, M, self.face_size)

        # Translation
        tx, ty = np.random.uniform(-0.1, 0.1, 2) * self.face_size[0]
        M = np.float32([[1, 0, tx], [0, 1, ty]])
        image = cv2.warpAffine(image, M, self.face_size)

        # Noise injection
        noise = np.random.normal(0, 0.01, image.shape).astype(np.float32)
        image = np.clip(image + noise, -1, 1)
        return image

    def preprocess_voice(self, audio_raw, training=True):
        # Assuming audio_raw is a numpy array of raw audio samples
        # 1. Bandpass filter (simulated)
        nyq = 0.5 * self.voice_sr
        low = 300 / nyq
        high = 3400 / nyq
        b, a = butter(5, [low, high], btype='band')
        audio_filtered = lfilter(b, a, audio_raw)

        # 2. MFCC extraction
        mfccs = librosa.feature.mfcc(y=audio_filtered, sr=self.voice_sr, n_mfcc=self.n_mfcc)

        # 3. Pad or truncate to fixed length
        if mfccs.shape[1] < self.mfcc_length:
            voice_data = np.pad(mfccs, ((0,0),(0, self.mfcc_length - mfccs.shape[1])), 'constant')
        else:
            voice_data = mfccs[:, :self.mfcc_length]

        # 4. Normalize (e.g., mean-variance normalization)
        mean = np.mean(voice_data)
        std = np.std(voice_data)
        if std > 1e-6:
            voice_data = (voice_data - mean) / std
        else:
            voice_data = voice_data - mean # Center if std is zero

        # 5. Data Augmentation (if training)
        if training and self.augment:
            voice_data = self._augment_voice(voice_data)

        voice_data = torch.from_numpy(voice_data).float()
        return voice_data

    def _augment_voice(self, mfccs):
        # Example augmentation: time stretching, pitch shifting, noise
        # For simplicity, adding noise
        noise = np.random.normal(0, 0.005, mfccs.shape).astype(np.float32)
        mfccs = mfccs + noise
        return mfccs

    def preprocess_behavioral(self, features, training=True):
        # Assuming features is a numpy array of behavioral features
        # 1. Normalize to [0, 1] or [-1, 1]
        features = (features - np.min(features)) / (np.max(features) - np.min(features) + 1e-6) * 2 - 1

        # 2. Data Augmentation (if training)
        if training and self.augment:
            features = self._augment_behavioral(features)

        features = torch.from_numpy(features).float()
        return features

    def _augment_behavioral(self, features):
        # Example augmentation: feature noise
        noise = np.random.normal(0, 0.001, features.shape).astype(np.float32)
        features = np.clip(features + noise, -1, 1)
        return features

    def __call__(self, sample, training=True):
        processed_sample = {
            'face': self.preprocess_face(sample['face'], training),
            'voice': self.preprocess_voice(sample['voice_raw'] if 'voice_raw' in sample else sample['voice'], training), # Assuming raw audio for voice
            'behavioral': self.preprocess_behavioral(sample['behavioral'], training),
            'label': sample['label']
        }
        return processed_sample

if __name__ == '__main__':
    print("Testing Preprocessor...")
    preprocessor = Preprocessor(augment=True)

    # Simulate raw inputs
    face_raw = np.random.rand(256, 256, 3).astype(np.float32)
    sr = 16000
    audio_raw = np.random.rand(sr * 2).astype(np.float32) # 2 seconds of audio
    behavioral_raw = np.random.rand(30).astype(np.float32)
    label = 0

    sample_raw = {
        'face': face_raw,
        'voice_raw': audio_raw, # Use voice_raw for raw audio input
        'behavioral': behavioral_raw,
        'label': label
    }

    processed_sample = preprocessor(sample_raw, training=True)
    print(f"Processed Face shape: {processed_sample['face'].shape}")
    print(f"Processed Voice shape: {processed_sample['voice'].shape}")
    print(f"Processed Behavioral shape: {processed_sample['behavioral'].shape}")
    print("Preprocessor test complete.")
