import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import TensorDataset, DataLoader

def load_data(file_path):
    df = pd.read_csv(file_path)
    return df

def preprocess_data(df, dynamic_predictors, target, early_prediction):
    df_cases = df[df[target] == 1]
    df_controls = df[df[target] == 0]
    df_cases_right_aligned = df_cases.groupby('VisitIdentifier').apply(
        lambda x: x[(x['MinutesFromArrival'].iloc[-1] - x['MinutesFromArrival']) >= early_prediction * 60])
    df_controls_right_aligned = df_controls.groupby('VisitIdentifier').apply(
        lambda x: x[(x['MinutesFromArrival'].iloc[-1] - x['MinutesFromArrival']) >= early_prediction * 60])

    df_filtered = pd.concat([df_cases_right_aligned, df_controls_right_aligned])

    df_filtered['label'] = np.where(df_filtered[target] == 1, 1, 0)

    X = df_filtered[dynamic_predictors]
    y = df_filtered['label']

    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    return X, y


def prepare_data(X, y, batch_size=32):
    if isinstance(X, pd.Series):
        X = X.to_frame()
    if isinstance(y, pd.Series):
        y = y.to_frame()

    X_tensor = torch.tensor(X.values.astype(np.float32)).unsqueeze(1)
    y_tensor = torch.tensor(y.values.astype(np.float32)).unsqueeze(1)
    dataset = TensorDataset(X_tensor, y_tensor)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return dataloader

def create_padding_mask(tensor, pad_value=0):
    return (tensor == pad_value).all(dim=-1)
