#include "kmeans.h"

//local search
dataset one_swap_local_search(dataset X, dataset C, dataset candi, double m, double eps,int maxT){
	double cst = rcost(X, C, m, 2);
	for(int t = 1; t <= maxT; t++){
		int flg = 1;
		for(datapoint x : candi){
			for(datapoint &c : C){
				datapoint b = c;
				c = x;
				if(rcost(X, C, m, 2) < (1 - eps) * cst){
					flg = 0;
					cst = rcost(X, C, m, 2);
					break;
				}
				c = b;
			}
			if(!flg) break;
		}
		if(flg) break;
	}
	return C;
}


//Lloyd for k-Robust Means.
dataset Lloyd(dataset X, dataset C, double m, int maxT){
	dataset Y = X; double mm = m;
	for(int t = 1; t <= maxT; t++){
		m = mm;
		X = Y;
		parti _Clusters; _Clusters.resize(C.size());
		vector<int> id;
		vector<double> d;
		for(int i = 0; i < X.size(); i++){
			id.push_back(i);
			d.push_back(dist(X[i], C));
		}
		sort(id.begin(), id.end(), [d](int x,int y){return d[x] > d[y];});
		for(int i = 0; i < X.size(); i++) X[i]= Y[id[i]];
		for(int i = 0; i < C.size(); i++) C[i][0] = i;
		int l = 0;
		for(; l < X.size(); l ++){
			double t = min(X[l][0], m);
			m -= t;
			X[l][0] -= t;
			if(m < 1e-7) break;
		}
		if(X[l][0] < 1e-7) l++;
		for(int i = l; i < X.size(); i++){
			datapoint x = X[i];
			_Clusters[NN(x, C)[0]].push_back(x);
		}
		dataset curC = C;
		for(int i = 0; i < C.size(); i++){
			if(_Clusters.empty()) continue;
			datapoint c;
			double sum = 0;
			for(int j = 0; j < C[i].size(); j++) c.push_back(0);
			for(datapoint x : _Clusters[i]){
				c = c + x * x[0];
				sum += x[0];
			}
			c = c / sum;
			c[0] = i;
			curC[i] = c;
		}
		if(curC == C) break;
		C = curC;
	}
	return C;
}

//k-Robust means algorithm (Lloyd's style)
dataset kmeansm(dataset X, int k, double m, int maxT, double OPT){
	dataset C, C_cur;
	double LOPT, UOPT, bestCost, cst = -1;
	for(int i = 1; i <= 5; i++){
		C_cur.clear();
		for(int j = 1; j <= k; j++) C_cur.push_back(X[(int)(randm() * X.size())]);
		bestCost = rcost(X, C_cur, m, 2);
		LOPT = 1; UOPT = bestCost * 5;
		for(double go = LOPT; go <= UOPT; go *= 5){
			dataset CC = tmeanspp(X, k, go / m, 2);
			if(rcost(X, CC, m, 2) < bestCost){
				bestCost = rcost(X, CC, m, 2);
				C_cur = CC;
			}
		}
		C_cur = Lloyd(X, C_cur, m, maxT);
		if(cst < 0 || rcost(X, C_cur, m, 2) < cst){
			cst = rcost(X, C_cur, m, 2);
			C = C_cur;
		}
	}
	return C;
}

//Tmeans ++ from Bhaskara et al. (2019)
dataset tmeanspp(dataset X, const int &k, const double &maxd, int z){
	dataset C;
	sampler sp;
	vector<double> w;
	w.resize((int)X.size());
	for(int i = 0; i < X.size(); i++) w[i] = X[i][0] * 1e18;
	for(int l = 1; l <= k; l ++){
		sp.init(w);
		datapoint curc = X[sp.sample()];
		C.push_back(curc);
		for(int i = 0; i < X.size(); i++){
			double wei = Pow(dist(X[i], curc), z);
			if(maxd > 0) wei = min(wei, maxd);
			w[i] = min(w[i], X[i][0] * wei);
		}
	}
	return C;
}


//vanilla k-Means algorithm
dataset Kmeans(dataset X, const int &k){
	if(debug){
		cerr<<"Init Data:"<<endl;
		for(int i = 0; i < X.size(); i++){
			cerr << "w: " << X[i][0] << "; ";
			for(int j = 1; j < X[i].size(); j++)
				cerr << X[i][j] << ' ';
			cerr << endl;
		}	
	}
	dataset C = tmeanspp(X, k, -1, 2); int rd = 0;
	for(int t = 1; t <= 1; t++){
		dataset curC = tmeanspp(X, k, -1, 2);
		if(cost(X,curC,2) < cost(X, C, 2)) C = curC;
	}
	while(1){
		++ rd;
		if(rd > 10) break;
		if(debug){
			cerr<<"Round: " << rd << endl;
			cerr <<"\t Centers: "<<endl;
			for(int i = 0; i < C.size(); i++){
				cerr<<"\t ";
				for(int j = 1; j < C[i].size(); j++)
					cerr << C[i][j] << ' ';
				cerr << endl;
			}
		}
		parti _Clusters; _Clusters.resize(k);
		for(int i = 0; i < C.size(); i++) C[i][0] = i;
		for(datapoint x : X)
			_Clusters[NN(x, C)[0]].push_back(x);
		dataset curC = C;
		for(int i = 0; i < C.size(); i++){
			if(_Clusters.empty()) continue;
			datapoint c;
			double sum = 0;
			for(int j = 0; j < C[i].size(); j++) c.push_back(0);
			for(datapoint x : _Clusters[i]){
				c = c + x * x[0];
				sum += x[0];
			}
			c = c / sum;
			c[0] = i;
			curC[i] = c;
		}
		if(curC == C) break;
		C = curC;
	}
	return C;
}

//(alpha,beta,gamma)-approximation 
dataset Approx_KRMeans(dataset X, const int &k, const double &m, int z, double OPT,int T){ //(beta, (1+beta)xi/(xi-1))-approx. using xi times k centers.
	double beta = 10;
	int xi = 3; 
	dataset C;
	double bestCost;
	double LOPT, UOPT, when;
	if (OPT < 0){	
		for(int i = 1; i <= k; i++) C.push_back(X[(int)(randm() * X.size())]);
		bestCost = rcost(X, C, m, z);
		LOPT = bestCost / (double)(X.size() - m); UOPT = bestCost * beta;
	}
	else{
		LOPT = UOPT = OPT;
	}
	for(double go = LOPT; go <= UOPT; go *= beta){ //guess the OPT
		for(int t = 1; t <= T; t++){ //amplify the success prob.
			dataset curC = tmeanspp(X, k * xi, go / m, z);
			double curCost = rcost(X, curC, int((1+beta)*xi/(double)(xi-1)*m), z);
			if(curCost < bestCost){
				bestCost = curCost;
				UOPT = bestCost;
				when = go;
				C = curC;
			}
		}
	}
	return C;
}
