
from scipy.spatial import distance


import nltk
from nltk.tokenize import sent_tokenize

# Download NLTK tokenizer model
nltk.download('punkt')
nltk.download('punkt_tab')


# 
# By using this file, you are agreeing to this product's EULA
#
# This product can be obtained in https://anonymous.4open.science/r/SAFE-ICLR
#
# Copyright ©2024-2025 XXXX-1
#


def myDistance(t1, t2):
	"""
	Return the cosine distance between two vectors with the same size
	"""
	return distance.cosine(t1, t2)

def getDistances(sentence, quoteDB):
	return [ myDistance(sentence, quote) for quote in quoteDB ]



class SelectClosestAttributor:
	model = None
	embDB = None

	def __init__(self, model, embDB = None, sentences = None):
		self.model = model
		self.embDB = embDB
		if self.embDB is None and not sentences is None:
			self.embDB = [list(self.model.encode(s)) for s in sentences]

	def attribute(self, sentence, quotes):
		emb = list(self.model(sentence)[-1][-1])
		
		target = [list(self.model(q)[-1][-1])[0] for q in quotes]

		distances = getDistances(emb, target)

		ind = distances.index(min(distances))

		return [ind]

	def attribute_PC(self, sentence_emb, quotes_emb=None):


		if not quotes_emb is None:
			distances = getDistances(sentence_emb, quotes_emb)
		else:
			distances = getDistances(sentence_emb, self.embDB)
			

		ind = distances.index(min(distances))

		return ([],[ind],[ind])


	def attribute_alreadyHaveTheQuotes(self, sentence):
		emb = list(self.model.encode(sentence))
		
		target = self.embDB

		distances = getDistances(emb, target)

		ind = distances.index(min(distances))

		return [ind]


