//Coreset Construction

#include "coreset.h"


// Coreset in Feldman & Schulman(2012) via sensitivity sampling
datapoint triMedian(dataset X, const int &k){
	dataset S;
	if(X.size() < 1000) S = X;
	else for(int i = 0; i < 1000; i ++) S.push_back(X[(int)(randm() * X.size())]);
	datapoint q = S[0];
	double cst = rcost(S, (dataset){q}, int(S.size() - S.size() / (double)k), 1);
	for(int i = 0; i < S.size(); i++){
		if(rcost(S, (dataset){S[i]}, int(S.size() - S.size() / (double)k), 1) < cst){
			cst = rcost(S, (dataset){S[i]}, int(S.size() - S.size() / (double)k), 1);
			q = S[i];
		}
	}
	return q;
}

pair<dataset, dataset> RRMedian(dataset X, const int &k, int T){
	if(T < 0) T = k;
	dataset Q = X, Rem;
	for(int i = 1; i <= T; i++){
		if(Q.size() == 1) break;
		datapoint q = triMedian(Q, k);
		dataset curQ;
		vector<pair<double,int> > P;
		for(int j = 0; j < Q.size(); j++) P.push_back({dist(Q[j], q), j});
		sort(P.begin(),P.end());
		for(int j = 0; j < ceil(Q.size() / (double)k / 2); j++){
			curQ.push_back(Q[P[j].second]);
		}
		for(int j = ceil(Q.size() / (double)k / 2); j < Q.size(); j++){
			Rem.push_back(Q[P[j].second]);
		}
		Q = curQ;
	}
	return {Q, Rem};
}

vector<pair<datapoint, double> > GetDistri(dataset X, const int &k, int T){
	double b = 100;
	dataset Q = X;
	vector<pair<datapoint, double> > Y;
	while(Q.size() > b){
		cerr << Q.size()<< endl;
		auto RR = RRMedian(Q, k, T);
		Q = RR.second;
		dataset curQ = RR.first;
		for(datapoint x : curQ){
			Y.push_back({x, x[0] * b * k / (double)curQ.size()});
		}
	}
	for(datapoint x : Q){
		Y.push_back({x, x[0]});
	}
	return Y;
}

dataset Imp_Coreset(vector<pair<datapoint, double> > Y, const int &n){
	sampler sp;
	vector<double> w;
	double sum = 0;
	for(int i = 0; i < Y.size(); i ++){
		w.push_back(Y[i].second);
		sum += Y[i].second;
	}
	sp.init(w);
	dataset C;
	for(int i = 1; i <= n; i++){
		auto z = Y[sp.sample()];
		datapoint x = z.first; 
		x[0] *= sum / z.second / n;
		C.push_back(x);
	}
	return C;
}


// Our Coreset
parti group(const parti &_Light, const datapoint &c, const double &err, int z){
	parti _G;
	dataset C;
	double cur = 0;
	for(dataset P : _Light){
		if(cur + cost(P, c, z) <= err){
			Union(C,P);
			cur += cost(P, c, z);
		}
		else{
			_G.push_back(C);
			C = P;
			cur = cost(P, c, z);
		}
	}
	_G.push_back(C);
	return _G;
}

pair<parti, parti> WZ_Decomposition(dataset X, const datapoint &c, const double &err, int z){
	parti _Rings, _W, _Z;
	double maxd = 1, mind = 1e18;
	for(datapoint x : X){
		maxd = max(maxd, dist(x, c));
		if(dist(x,c) > 0) mind = min(mind, dist(x, c));
	}
	_Rings.resize(int(log2(maxd/mind)) + 2);
	for(datapoint x : X){
		if(dist(x,c) == 0) _Rings[0].push_back(x);
		else _Rings[int(log2(dist(x, c)/mind)) + 1].push_back(x);
	}
	parti _Light;
	for(dataset P : _Rings){
		if(cost(P, c, z) > err){
			_W.push_back(P);
			Union(_Z, group(_Light, c, err, z));
			_Light.clear();
		}
		else{
			_Light.push_back(P);
		}
	}
	Union(_Z, group(_Light, c, err, z));
	return make_pair(_W, _Z);
}

dataset Two_point_Coreset(dataset X, const datapoint &c){
	if(X.empty()) return X;
	datapoint p_c = X[0], p_f = X[0];
	for(datapoint x : X){
		if(dist(x, c) < dist(p_c, c)) p_c = x;
		if(dist(x, c) > dist(p_f, c)) p_f = x;
	}
	double d_c = dist(p_c, c), d_f = dist(p_f, c);
	if(d_c == d_f){
		p_c[0] = 0;
		for(datapoint x : X) p_c[0] += x[0];
		return (dataset){p_c};
	}
	p_c[0] = 0;
	p_f[0] = 0;
	for(datapoint x : X){
		p_f[0] += x[0] * (dist(x, c) - d_c) / (d_f - d_c);
		p_c[0] += x[0] * (d_f - dist(x, c)) / (d_f - d_c);
	}
	return (dataset){p_c, p_f};
}

dataset Uniform_Coreset(dataset X, const int &n){
	dataset Coreset;
	vector<double> distri;
	double sum = 0;
	for(int i = 0; i < X.size(); i++){
		distri.push_back(X[i][0]);
		sum += X[i][0];
	}
	sampler sp; sp.init(distri);
	for(int i = 1; i <= n; i++){
		datapoint x = X[sp.sample()];
		x[0] = sum / n;
		Coreset.push_back(x);
	}
	return Coreset;
}


dataset Our_Coreset(dataset X, dataset C_approx, double m, int x, int z, double thr){
	dataset Coreset;
	double err;
	vector<pair<double, int>> d;
	for(int i = 0; i < X.size(); i++){
		d.push_back({dist(X[i], C_approx), i});
	}
	sort(d.begin(), d.end());
	dataset Y = X;
	for(int i = 0; i < X.size(); i++) X[i]= Y[d[i].second];
	for(int i = X.size(); ~i; i--){
		if(m < 1e-7){
			X.resize(i); break;
		}
		m -= X[i-1][0];
		Coreset.push_back(X[i-1]);
	}
	if(x == 0) return Coreset;
	parti _Clusters;
	_Clusters.resize(C_approx.size());
	for(int i = 0; i < (int)C_approx.size(); i++) C_approx[i][0] = i;
	for(datapoint x : X){
		_Clusters[NN(x,C_approx)[0]].push_back(x);
	}
	parti _H;
	for(int i = 0; i < (int)C_approx.size(); i++){
		if(_Clusters[i].empty()) continue;
		auto D = WZ_Decomposition(_Clusters[i], C_approx[i], thr * cost(_Clusters[i], C_approx[i], z), z);
		parti _W = D.first, _Z = D.second;
		for(dataset S : _Z){
			dataset tpCoreS = Two_point_Coreset(S, C_approx[i]);
			Union(Coreset, tpCoreS);
			x -= tpCoreS.size();
		}
		Union(_H, _W);
	}
	if(x < 0){
		cerr << "Coreset construction failed." << endl;
		return Coreset;
	}
	if(_H.empty()) return Coreset;
	vector<int> sz;
	for(int i = 0; i < _H.size(); i++) sz.push_back(x / (int)_H.size());
	sampler sp; sp.init_uniform(_H.size());
	for(int i = 0; i < x % (int)_H.size(); i++) sz[sp.sample()] ++;
	for(int i = 0; i < _H.size(); i++){
		Union(Coreset, Uniform_Coreset(_H[i], sz[i]));	
	}
	return Coreset;
}

dataset Outlier_Uniform(dataset X, dataset C_approx,double m, int x){
	dataset Coreset;
	vector<pair<double, int>> d;
	for(int i = 0; i < X.size(); i++){
		d.push_back({dist(X[i], C_approx), i});
	}
	sort(d.begin(), d.end());
	dataset Y = X;
	for(int i = 0; i < X.size(); i++) X[i]= Y[d[i].second];
	for(int i = X.size(); i; i--){
		if(m < 1e-7){
			X.resize(i); break;
		}
		m -= X[i-1][0];
		Coreset.push_back(X[i-1]);
	}
	Union(Coreset, Uniform_Coreset(X, x));
	return Coreset;
}
