import pandas as pd
import numpy as np


def load_data(file_path):
    df = pd.read_csv(file_path)
    return df


def preprocess_data(df, dynamic_predictors, target, early_prediction):
    df_cases = df[df[target] == 1]
    df_controls = df[df[target] == 0]

    df_cases_filtered = df_cases.groupby('VisitIdentifier').apply(
        lambda x: x[(x['MinutesFromArrival'].iloc[-1] - x['MinutesFromArrival']) >= early_prediction * 60])
    df_controls_filtered = df_controls.groupby('VisitIdentifier').apply(
        lambda x: x[(x['MinutesFromArrival'].iloc[-1] - x['MinutesFromArrival']) >= early_prediction * 60])

    df_filtered = pd.concat([df_cases_filtered, df_controls_filtered])

    df_filtered.set_index('VisitIdentifier', inplace=True)
    df_filtered.sort_values(by=['VisitIdentifier', 'MinutesFromArrival'], inplace=True)
    df_filtered['TimeStep'] = df_filtered.groupby('VisitIdentifier')['MinutesFromArrival'].rank(method='min',
                                                                                                ascending=False)
    df_filtered['label'] = np.where(df_filtered[target] == 1, 1, 0)

    X = df_filtered[dynamic_predictors + ['TimeStep']]
    y = df_filtered['label']

    return X, y


def create_sequences(input_data, labels, sequence_length):
    data_seq = []
    label_seq = []
    for i in range(len(input_data) - sequence_length):
        data_seq.append(input_data[i:i + sequence_length])
        label_seq.append(labels[i + sequence_length - 1])
    return np.array(data_seq), np.array(label_seq)
