'''
- cosine.py
- This file handles measuring various cosine distances for a given embedding
'''

# External Imports
import re
from scipy.spatial import distance

# Internal imports
import src.core.interface.ranking as ranking



'''
----------rank_by_cosine----------
- This function generates a cosine distance-based ranking based on the specified text using the specified embedding
-----Inputs-----
- text - the text to rank by
- features - the features to rank
- embedding - the current embedding to use
- configuration - the currently-active configuration
-----Output-----
- new_ranking - a new ranked list of the features based on the text and embedding
'''
def rank_by_cosine(text, features, embedding, configuration):
    new_ranking = []
    text_embedding = ranking.get_avg_text_embedding(text, embedding, configuration)
    for feature in features:
        name = feature["name"].replace("_", " ").lower()
        description = re.sub('\([^()]*\)', '', feature["description"])
        #print(text, "\n", description)
        full_text = name + " " + description
        # #dist = 1 - distance.cosine(text_embedding, ranking.get_avg_text_embedding(description, embedding, configuration))
        # dist1 = 1 - distance.cosine(text_embedding, ranking.get_avg_text_embedding(description, embedding, configuration))
        # dist2 = 1 - distance.cosine(text_embedding, ranking.get_avg_text_embedding(name, embedding, configuration))
        # # Add the distance that's greater
        # dist = dist1 if dist1 > dist2 else dist2
        dist = 1 - distance.cosine(text_embedding, ranking.get_avg_text_embedding(full_text, embedding, configuration))
        
        #print(text, "-", "{}: {}".format(description, dist1), "-", "{}: {}".format(name, dist2), "-", dist)
        #dist = (dist1+dist2)/2
        new_ranking.append({"name":feature["name"], "distribution":dist})
    ranks = sorted(new_ranking, key=lambda i: i["distribution"], reverse=True)
    # print(ranks)
    return ranks


'''
----------rank_by_cosine_max_pull----------
- This function generates a max-pulled cosine distance-based ranking based on the specified text using the specified embedding
-----Inputs-----
- text - the text to rank by
- features - the features to rank
- embedding - the current embedding to use
- configuration - the currently-active configuration
-----Output-----
- new_ranking - a new ranked list of the features based on the text and embedding
'''


'''
----------rank_by_cosine_ngram_window----------
- This function generates an n-gram windowed cosine distance-based ranking based on the specified text using the specified embedding
-----Inputs-----
- text - the text to rank by
- features - the features to rank
- embedding - the current embedding to use
- configuration - the currently-active configuration
- window_size - the window size to use (this is 3 by default)
-----Output-----
- new_ranking - a new ranked list of the features based on the text and embedding
'''