import numpy as np

from sklearn.metrics import f1_score, roc_auc_score, average_precision_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler

import lightgbm
from sklearn.linear_model import LogisticRegression


lgb = lightgbm.LGBMClassifier(
                            class_weight='balanced', 
                            objective='binary', 
                            n_jobs=20,
                            random_state=2023,
                            )

X_train = np.load("../proc_data/ecg/raw_train_data.npy")
y_train = np.load("../proc_data/ecg/train_label.npy")
X_test = np.load("../proc_data/ecg/raw_test_data.npy")
y_test = np.load("../proc_data/ecg/test_label.npy")

lgb_train = lgb.fit(X_train, y_train)
pred_train = lgb_train.predict(X_test)
print("AUROC (real data): " + str(np.around(roc_auc_score(y_test, lgb_train.predict_proba(X_test)[:, 1]), 4)))
print("AUPRC (real data): " + str(np.around(average_precision_score(y_test, lgb_train.predict_proba(X_test)[:, 1]), 4)))
print("**********************************")


syn_data = np.load("EHRDiff.npy")

syn_data = np.clip(syn_data, 0, 1)
syn_data[:, 0] = np.rint(syn_data[:, 0]) # label

num_each_label = syn_data.shape[0] // 2
labels = np.array([0 for _ in range(num_each_label)] + [1 for _ in range(num_each_label)])

lgb_syn = lgb.fit(syn_data, labels)
pred_syn = lgb_syn.predict(X_test)
print("AUROC (syn data): " + str(np.around(roc_auc_score(y_test, lgb_syn.predict_proba(X_test)[:, 1]), 4)))
print("AUPRC (syn data): " + str(np.around(average_precision_score(y_test, lgb_syn.predict_proba(X_test)[:, 1]), 4)))
print("**********************************")


