import pandas as pd
import csv

TRAIN_SPLIT=0.9

mnli_df = pd.read_csv("mnli_full.csv",
    sep=',', quoting=csv.QUOTE_ALL, header=None)


mnli_small_df = mnli_df.iloc[:,1:3]
mnli_small_df = mnli_small_df.sample(frac=1.)

# Dropping of duplicates should be taken care of already in export - but better to be safe, do it here again.
total_size = mnli_small_df.shape[0]
print("Initial size: ", total_size)
# there are duplicates in the train data: remove them
mnli_small_df.drop_duplicates(inplace=True)
total_size = mnli_small_df.shape[0]
print("Size after dropping duplicates: ", total_size)

train_split_size = round(TRAIN_SPLIT * total_size)


train_data = mnli_small_df.iloc[:train_split_size]
test_data = mnli_small_df.iloc[train_split_size:]

train_data.to_csv('mnli_train-train.csv', index=False, quoting=csv.QUOTE_ALL, header=False)
test_data.to_csv('mnli_train-test.csv', index=False, quoting=csv.QUOTE_ALL, header=False)
