import nltk
# nltk.download('punkt')
# nltk.download('stopwords')
from collections import Counter
from nltk.corpus import stopwords
# from word_cloud.word_cloud_generator import WordCloud
# from IPython.core.display import HTML
# from nltk.corpus import reuters
import nltk
# import pandas as pd


with open('PATH/.txt', 'r') as file:
    texts = file.read().replace('\n', '')

words = nltk.word_tokenize(texts)
words_lower = [word.lower() for word in words]
words_alpha = [word for word in words_lower if word.isalnum()]
stop_words = set(stopwords.words('english'))
#
add_to_stopwords = {'next', 'front', 'rear', 'besides', 'below', 'under', 'near', 'back', 'side', 'near',
'background', 'foreground', 'behind', 'along', 'top' 'small', 'large', 'sitting', 'driving', 'riding', 'laying',
                    'standing', 'looking', 'holding', 'wearing',
'outside', 'inside', 'another', 'together', 'old', 'playing', 'open', 'close', 'new', 'one',
'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'like', 'looks', 'owner'}


stop_words.update(add_to_stopwords)
filtered_words = [word for word in words_alpha if word not in stop_words]
word_counts = Counter(filtered_words)

word_counts_sorted = dict(sorted(word_counts.items(), key=lambda item: item[1], reverse=True))
top10words = list(word_counts_sorted.items())[:10]

# Print the word counts
print(top10words)
