'''
- euclidean.py
- This file handles measuring various euclidean distances for a given embedding
'''

# External Imports
import re
import math
from scipy.spatial import distance

# Internal imports
import src.core.interface.ranking as ranking



'''
----------rank_by_euclidean----------
- This function generates a euclidean distance-based ranking based on the specified text using the specified embedding
-----Inputs-----
- text - the text to rank by
- features - the features to rank
- embedding - the current embedding to use
- configuration - the currently-active configuration
-----Output-----
- new_ranking - a new ranked list of the features based on the text and embedding
'''
def rank_by_euclidean(text, features, embedding, configuration):
    new_ranking = []
    text_embedding = ranking.get_avg_text_embedding(text, embedding, configuration)
    for feature in features:
        name = feature["name"].replace("_", " ").lower()
        description = re.sub('\([^()]*\)', '', feature["description"])
        dist1 = distance.euclidean(text_embedding, ranking.get_avg_text_embedding(description, embedding, configuration))
        dist2 = distance.euclidean(text_embedding, ranking.get_avg_text_embedding(name, embedding, configuration))
        # Reciprocal the distance values so the larger they are, the better
        dist1 = 1 / math.exp(dist1)
        dist2 = 1 / math.exp(dist2)
        # Add the distance that's greater
        dist = dist1 if dist1 > dist2 else dist2
        #print(text, "-", "{}: {}".format(description, dist1), "-", "{}: {}".format(name, dist2), "-", dist)
        new_ranking.append({"name":feature["name"], "distribution":dist})
    return sorted(new_ranking, key=lambda i: i["distribution"], reverse=True)


'''
----------rank_by_euclidean_max_pull----------
- This function generates a max-pulled euclidean distance-based ranking based on the specified text using the specified embedding
-----Inputs-----
- text - the text to rank by
- features - the features to rank
- embedding - the current embedding to use
- configuration - the currently-active configuration
-----Output-----
- new_ranking - a new ranked list of the features based on the text and embedding
'''


'''
----------rank_by_euclidean_ngram_window----------
- This function generates an n-gram windowed euclidean distance-based ranking based on the specified text using the specified embedding
-----Inputs-----
- text - the text to rank by
- features - the features to rank
- embedding - the current embedding to use
- configuration - the currently-active configuration
- window_size - the window size to use (this is 3 by default)
-----Output-----
- new_ranking - a new ranked list of the features based on the text and embedding
'''