


from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize



class BM25:

	def __init__(self):
		pass



	def attribute(self, sentence, quotes): #bm25
		# Tokenize documents
		tokenized_quotes = [word_tokenize(q.lower()) for q in quotes]
		bm25 = BM25Okapi(tokenized_quotes)

		# Tokenize query
		tokenized_query = word_tokenize(sentence.lower())
		scores = bm25.get_scores(tokenized_query)

		# Rank scores (highest first)
		top_indexes = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
		return [top_indexes[:0], top_indexes[:1], top_indexes[:2]]





