'''
Date: 2021-12-06 13:02:59
LastEditors: yuhhong
LastEditTime: 2021-12-06 13:07:06
'''
import pandas as pd
from sklearn.model_selection import train_test_split

import argparse



if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Preprocess the Data')
    parser.add_argument('--input', type=str, default = '',
                        help='path to input data')
    parser.add_argument('--output_train', type=str, default = '',
                        help='path to output data')
    parser.add_argument('--output_test', type=str, default = '',
                        help='path to output data')
    args = parser.parse_args()

    input_path = args.input
    output_train_path = args.output_train
    output_test_path = args.output_test

    df = pd.read_csv(input_path)
    print(df)

    # split by smiles
    train, test = train_test_split(df, test_size=0.1)
    print(train)
    print(test)
    train.to_csv(output_train_path, index=False)
    test.to_csv(output_test_path, index=False)