import os
import pandas as pd
from sklearn.model_selection import train_test_split
from ... import const
import numpy as np


REVIEWS_PATH =  const.REVIEWS_PATH
FR_RAW_DATA_DIR = const.FR_RAW_DATA_DIR
TRAINING_SIZE = const.TRAINING_SIZE

FOOD_REVIEW_SIZE = 20000


def split_food_review(random_seed):

    # Set random seed
    np.random.seed(random_seed)

    # Make sure directories exist
    if not os.path.exists(FR_RAW_DATA_DIR):
        raise FileNotFoundError('Directory ' + FR_RAW_DATA_DIR + ' does not exist.')

    # Load the raw food review dataset
    csv_df = pd.read_csv(REVIEWS_PATH)

    # Make sure reviews contain the keyword
    csv_df = pd.read_csv(REVIEWS_PATH)
    csv_df = csv_df[(csv_df['Text'].str.contains("The ")) | (csv_df['Text'].str.contains("Be ")) | (csv_df['Text'].str.contains(" the ")) | (csv_df['Text'].str.contains(" be "))]
    csv_df = csv_df[(csv_df['Text'].str.contains("A ")) | (csv_df['Text'].str.contains(" a ")) | (csv_df['Text'].str.contains("To ")) | (csv_df['Text'].str.contains(" to "))]

    csv_df = csv_df[:FOOD_REVIEW_SIZE]

    # Split train and test sets
    train_df, test_df = train_test_split(csv_df, train_size=TRAINING_SIZE, random_state=random_seed)

    # Return dataframes
    return train_df, test_df