from data_reader import data_reader
from algorithms import across_distance
import math
import numpy as np
import random
import matplotlib.pyplot as plt
import seaborn as sns

for dataset in ["credit_card_default", 'give_me_credit', 'adult']:

    (D_1, D_2) = data_reader(dataset) 
    dist= []

    for i in range(10000):   
        # Get random vectors to compare
        x = random.choice(D_1)
        y = random.choice(D_2)
        # Choose feature weights for the distance functions
        d = len(x)
        a = list(np.random.uniform(0.0, 1, d))
        b = list(np.random.uniform(0.0, 1, d))
        s = list(np.random.uniform(-0.01, 0.01, d))   
        # Calculate distance and place it in the corresponding bucket
        dist.append(across_distance(x, y, a, b, s))
    # Plot the count histogram
    #print(min(dist))
    plt.rcParams['figure.figsize'] = (16, 10)        
    plt.rcParams.update({'font.size': 28})
    sns.histplot(dist, kde=False, stat='count', color='skyblue') 
    plt.xlabel('Value for across-groups similarities')
    plt.ylabel('Frequency Count') 
    plt.title('Frequency count for across-groups similarities' ) 
    plt.savefig('Plots/DD-'+ dataset +'.png') 
    plt.close()