import numpy as np
import pandas as pd


def flchain():
    # read data
    filename = './data/R/flchain.csv'
    print('Read %s' % filename)
    df = pd.read_csv(filename)

    # delete unnecessary columns
    df = df.drop(df.columns[[0]], axis=1)
    df = df.drop(['sample.yr', 'chapter'], axis=1)
    df = df.dropna()
    df.index = np.arange(0, len(df))

    # extract feature vectors
    sex_binary = { 'F': 0, 'M': 1 }
    df = df.replace({'sex': sex_binary})
    df_x = df[['age', 'sex', 'kappa', 'lambda', 'creatinine', 'mgus',
               'flc.grp' ]]
    #df_x = df[['sex']]

    # extract target vectors
    df_y = df[['futime', 'death']]

    return df_x, df_y

def prostateSurvival():
    # read feature vectors
    filename = './data/R/prostateSurvival.csv'
    print('Read %s' % filename)
    df = pd.read_csv(filename)

    # delete unnecessary columns
    df = df.drop(df.columns[[0]], axis=1)
    df.index = np.arange(0, len(df))

    # extract feature vectors
    df_x = pd.get_dummies(df, columns=['grade', 'stage', 'ageGroup'])
    df_x = df_x.drop(['survTime', 'status'], axis=1)

    # extract target vectors
    status_binary = { '0': 0, '1': 1, '2': 1 }
    df = df.replace({'status': status_binary})
    df_y = df[['survTime', 'status']]

    return df_x, df_y

def support():
    # read feature vectors
    filename_x = './data/R/support_x.csv'
    print('Read %s' % filename_x)
    df_x = pd.read_csv(filename_x)
    df_x = df_x.drop(df_x.columns[0], axis=1)
    df_x.index = np.arange(0, len(df_x))

    # read target vectors
    filename_y = './data/R/support_y.csv'
    print('Read %s' % filename_y)
    df_y = pd.read_csv(filename_y)
    df_y = df_y[['d.time', 'death']]
    df_y.index = df_x.index

    return df_x, df_y
