
from scipy.spatial import distance


import nltk
from nltk.tokenize import sent_tokenize

# Download NLTK tokenizer model
nltk.download('punkt')
nltk.download('punkt_tab')


# 
# By using this file, you are agreeing to this product's EULA
#
# This product can be obtained in https://anonymous.4open.science/r/SAFE-ICLR
#
# Copyright ©2024-2025 XXXX-1
#


def myDistance(t1, t2):
	"""
	Return the cosine distance between two vectors with the same size
	"""
	return distance.cosine(t1, t2)

def getDistances(sentence, quoteDB):
	return [ myDistance(sentence, quote) for quote in quoteDB ]



class SelectClosestNAttributor:
	model = None

	def __init__(self, model):
		self.model = model

	def attribute(self, sentence, quotes):
		emb = list(self.model(sentence)[-1][-1])
		
		target = [list(self.model(q)[-1][-1])[0] for q in quotes]

		distances = getDistances(emb, target)

		ind = distances.index(min(distances))

		return [ind]

	def attribute_PC(self, sentence_emb, quotes_emb):

		distances = getDistances(sentence_emb, quotes_emb)

		ind_r1 = distances.index(min(distances))


		distances = [] 
		for i in range(len(quotes_emb)):
			for j in range(i, len(quotes_emb)):
				avg = [(quotes_emb[i][z]+quotes_emb[j][z])/2 for z in range(len(quotes_emb[0]))]
				distances.append( [myDistance(sentence_emb, avg), [i,j] ] )

		#distances = getDistances(sentence_emb, quotes_emb)

		#tmp = [ (distances[i], i) for i in range(len(distances))]
		distances.sort()

		ind_r2 = list(set(distances[0][1]))


		return ([],[ind_r1],ind_r2)



