import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tqdm
from tqdm import tqdm
import os
import torch
import pickle
from sklearn import metrics
import random
%matplotlib inline
import seaborn as sns
import matplotlib.lines as mlines
import matplotlib.pyplot as plt

def get_anisotropy(data, n):
    sampled = random.sample(list(data), n)
    sim_sum = 0
    count = 0
    for i in tqdm(range(len(sampled))):
        for j in range(len(sampled)):
            sim_sum = sim_sum + metrics.pairwise.cosine_similarity(sampled[i].reshape(1,-1), sampled[j].reshape(1,-1))
            count = count + 1
            
    return round((sim_sum/count)[0][0], 3)
        
    
avg_sim_o = []
avg_sim_a = []

for l in range(13):
    print("layer", l)
    data_o = torch.load('/embedding_dir_path/ms_embs/bert/original/original_all_'+str(l)+'.pt')
    data_a = torch.load('/makesense_dir_path/data/approximated_embeddings/layer_'+str(l)+'.pt')
    o_sim = get_anisotropy(data_o, 1000)
    a_sim = get_anisotropy(data_a, 1000)
    avg_sim_o.append(o_sim)
    avg_sim_a.append(a_sim)
    
    
anisotropy_df = pd.DataFrame(zip(list(range(13)), avg_sim_o, avg_sim_a), columns = ['layer', 'avg_sim_o', 'avg_sim_a'])
anisotropy_df.to_csv('/makesense_dir_path/results/anisotropy.csv', index = False)
