'''
- tokenization.py
- This file handles tokenizing data for use in training NER techniques for VIDS
'''

# External imports

# Internal imports

'''
----------tokenize_query----------
- Tokenizes the given query (For use in formatting)
-----Inputs-----
- query - The query to tokenize
-----Output-----
- tokenized_query - The tokenized query, as an array
'''
def tokenize_query(query):
    tokenized_query = []
    # For each word in the query, add it as a token
    for x in query.split(" "):
        # If there's an apostrophe, split it into two tokens. Otherwise, add the whole token
        if "'" in x:
            tokenized_query.append(x.split("'")[0])
            tokenized_query.append(x.split("'")[1])
        else:
            tokenized_query.append(x)

    return tokenized_query