import numpy as np
import scipy
import math
from sklearn.cluster import KMeans
import copy
from tqdm import tqdm
import random
from sklearn.neighbors import KDTree
from scipy.spatial.distance import cdist
from scipy.spatial.distance import pdist
import pickle
import time

import matplotlib.pyplot as plt
from matplotlib import ticker
from matplotlib.ticker import FuncFormatter

from tools import dist
from Baseline import baseline
from Consistent_alg import constrained_kmeans
from Consistent_alg import Consistent_Clustering
from coreset import incremental_coreset

from ucimlrepo import fetch_ucirepo

#skin=229
#shuttle=148
#covertype=31

database=[229,148,31]
candidate_k=[5,10,20]
s={229:'skin', 148:'shuttle',31:'covertype'}

for data_id in database:
  for k in candidate_k:
    print('begin: %s, k=%d'% (s[data_id],k))
    # fetch dataset
    Data = fetch_ucirepo(id=data_id)

    # data (as pandas dataframes)
    X = Data.data.features
    y = Data.data.targets
    X=np.array(X)

    Base=baseline()
    Base.init_parameter(X,k)
    Base.sketch()

    f=open('./%s/%s-k-%d-LV17-time.pkl'%(s[data_id],s[data_id],k),'wb')
    pickle.dump(Base.time,f)

    f=open('./%s/%s-k-%d-LV17-consistency.pkl'%(s[data_id],s[data_id],k),'wb')
    pickle.dump((Base.index,Base.consistency),f)

    f=open('./%s/%s-k-%d-LV17-cost.pkl'%(s[data_id],s[data_id],k),'wb')
    pickle.dump((Base.index,Base.cost),f)

    sample_rate=0.1
    Coreset=incremental_coreset()
    Coreset.init(X,k,sample_rate)
    Coreset.fit()

    f=open('./%s/%s-k-%d-coreset.pkl'%(s[data_id],s[data_id],k),'wb')
    pickle.dump((Coreset),f)

    alg=Consistent_Clustering()
    alg.init(k,Coreset.coreset_points,Coreset.coreset_weight,del_parameter=1.1,wsp_parameter=1e-4)
    alg.simulate()

    alg_time=copy.deepcopy(Coreset.time)
    brute_time=copy.deepcopy(Coreset.time)

    for i in range(len(alg.time)):
      alg_time[Coreset.index[i]]+=alg.time[i]
      brute_time[Coreset.index[i]]+=alg.time_brute_force[i]




    f=open('./%s/%s-k-%d-naive-time.pkl'%(s[data_id],s[data_id],k),'wb')
    pickle.dump(brute_time,f)

    f=open('./%s/%s-k-%d-ours-time.pkl'%(s[data_id],s[data_id],k),'wb')
    pickle.dump(alg_time,f)



    f=open('./%s/%s-k-%d-naive-consistency.pkl'%(s[data_id],s[data_id],k),'wb')
    pickle.dump((Coreset.index,alg.baseline_consistency),f)

    f=open('./%s/%s-k-%d-ours-consistency.pkl'%(s[data_id],s[data_id],k),'wb')
    pickle.dump((Coreset.index,alg.consistency),f)



    f=open('./%s/%s-k-%d-naive-cost.pkl'%(s[data_id],s[data_id],k),'wb')
    pickle.dump((Coreset.index,alg.cost_kmeans),f)

    f=open('./%s/%s-k-%d-ours-cost.pkl'%(s[data_id],s[data_id],k),'wb')
    pickle.dump((Coreset.index,alg.cost_alg),f)



    print('finish %s, k=%d'%(s[data_id],k))
