#########################################################################################################
directory = "./" 
time_limit = 3600 * 5
m = 100
l_list = [2, 4]  
#########################################################################################################
import numpy as np
import pandas as pd
import GPy
from SGP import SurrogateGaussianProcess
from sklearn.cluster import KMeans
D = pd.read_csv("./data/california.csv")    
likelihood_function = 'Gaussian'
grid1 = 1.0
grid2 = 1.0
col = ["Latitude", "Longitude"]
y_col = "HousingPrices"
D_ = D[col + [y_col]]
X = np.array(D_.iloc[:, :-1])
y = np.array(D_.iloc[:, -1])
X_ = np.array(D_.iloc[:, :-1])
Z_ = np.array([[int(X_[i,0] / grid1) * grid1 + grid1 / 2, int(X_[i,1] / grid2) * grid2 + grid2 / 2] for i in range(len(X_))])
Z__ = np.unique(Z_, axis=0)
label = [j for i in range(len(Z_)) for j in range(len(Z__)) if (((Z_[i,0] - Z__[j,0]) / grid1) ** 2) + (((Z_[i,1] - Z__[j,1]) / grid2) ** 2) == 0]  
neighbors_list = [[j for j in range(len(Z__)) if ((((Z__[i,0] - Z__[j,0]) / grid1) ** 2) + (((Z__[i,1] - Z__[j,1]) / grid2) ** 2) <= 1) & (j != i)] for i in range(len(Z__))]
SGP = SurrogateGaussianProcess(col)
kernel = GPy.kern.Bias(1) * GPy.kern.RBF(X.shape[1], ARD=True) + GPy.kern.White(1)    
SGP.learn_GP(X, y, kernel, m=m, likelihood_function=likelihood_function, tau=False)  
SGP.make_posterior(X_)
result = []
for l in l_list:  
    omega_kmeans = KMeans(n_clusters=l).fit(X_).labels_        
    score_kmeans = SGP.get_score(omega_kmeans)
    omega_DAG = SGP.find_cluster(l, time_limit=time_limit, surrogate_model="DAG", label=label, neighbors_list=neighbors_list)  
    score_DAG = SGP.get_score(omega_DAG)
    print(l, score_kmeans, score_DAG)
    result += [[l, score_kmeans, score_DAG]]
    D__ = pd.concat([D_, pd.DataFrame(np.array(SGP.mu)[:,None])], axis=1)    
    D__ = pd.concat([D__, pd.DataFrame(np.array(SGP.weight)[:,None])], axis=1)    
    D__ = pd.concat([D__, pd.DataFrame(np.array(omega_kmeans)[:,None])], axis=1)
    D__ = pd.concat([D__, pd.DataFrame(np.array(omega_DAG)[:,None])], axis=1)
    D__.columns = col + [y_col, "mu", "weight", "omega_kmeans", "omega_DAG"]
    D__.to_csv("DAG_l" + str(l) + ".csv", index=False)
result = pd.DataFrame(l, score_kmeans, score_DAG)
result.columns = ["l", "score_kmeans", "score_DAG"]
result.to_csv("DAG_score.csv", index=False)
#########################################################################################################
directory = "./" 
l_list = [2, 4] 
ticks = [1.0, 1.5, 2.0, 2.5, 3.0]
s = 0.3 
vmin = 1 
vmax = 3  
#########################################################################################################
import numpy as np 
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 
import matplotlib as mpl 
from matplotlib.colors import Normalize 
import matplotlib.gridspec as gridspec 
D = pd.read_csv("DAG_l" + str(l_list[0]) + ".csv") 
plt.rcParams["font.family"] = "Times New Roman" 
fig, axes = plt.subplots(2, 3, sharex="col", layout="constrained")
axes[0, 0].scatter(D["Longitude"], D["Latitude"], c=D["HousingPrices"], cmap='viridis', norm=Normalize(vmin=vmin, vmax=vmax), s=s) 
axes[0, 0].set_aspect('equal')
axes[0, 0].axis("off") 
axes[0, 0].grid()
axes[0, 0].set_title("Dataset") 
axes[1, 0].scatter(D["Longitude"], D["Latitude"], c=D["mu"], cmap='viridis', norm=Normalize(vmin=vmin, vmax=vmax), s=s) 
axes[1, 0].set_aspect('equal')
axes[1, 0].axis("off") 
axes[1, 0].grid()
axes[1, 0].set_title("Original Model") 
for num, l in enumerate(l_list): 
    D = pd.read_csv("DAG_l" + str(l) + ".csv") 
    D["mu_weight"] = [D["mu"][i] * D["weight"][i] for i in range(len(D))] 
    vhat_kmeans = [] 
    for o in range(l):
        D_ = D[D["omega_kmeans"] == o].reset_index(drop=False) 
        vhat_kmeans += [sum(D_["mu_weight"]) / sum(D_["weight"])] if len(D_) > 0 else [0] 
    D["f"] = [vhat_kmeans[D["omega_kmeans"][i]] for i in range(len(D))] 
    axes[0, num + 1].scatter(D["Longitude"], D["Latitude"], c=D["f"], cmap='viridis', norm=Normalize(vmin=vmin, vmax=vmax), s=s) 
    axes[0, num + 1].set_aspect('equal')
    axes[0, num + 1].axis("off") 
    axes[0, num + 1].grid()
    title1 = "K-means  l = 2" if num == 0 else "K-means  l = 4"
    axes[0, num + 1].set_title(title1) 
    vhat_DAG = [] 
    for o in range(l):
        D_ = D[D["omega_DAG"] == o].reset_index(drop=False) 
        vhat_DAG += [sum(D_["mu_weight"]) / sum(D_["weight"])] if len(D_) > 0 else [0] 
    D["f"] = [vhat_DAG[D["omega_DAG"][i]] for i in range(len(D))] 
    axes[1, num + 1].scatter(D["Longitude"], D["Latitude"], c=D["f"], cmap='viridis', norm=Normalize(vmin=vmin, vmax=vmax), s=s) 
    axes[1, num + 1].set_aspect('equal')
    axes[1, num + 1].axis("off")  
    axes[1, num + 1].grid()
    title2 = "Our Approach  l = 2" if num == 0 else "Our Approach  l = 4"
    axes[1, num + 1].set_title(title2)     
fig.colorbar(mpl.cm.ScalarMappable(Normalize(vmin=vmin, vmax=vmax)), ax=axes.ravel().tolist(), ticks=ticks) 
plt.savefig("MAP.pdf", dpi=400)    
plt.show() 