/*
experiment: speedup for Lloyd's style for k-Robust Means.
*/

#include "../lib/head.h"
#include "../lib/tools.h"
#include "../lib/function.h"
#include "../lib/kmeans.h"
#include "../lib/coreset.h"

double Mean(vector<double> a){
	double sum = 0;
	for(double x : a) sum += x;
	return sum / (int)a.size();
}

double Max(vector<double> a){
	double ret = 0;
	for(double x : a) ret = max(ret, x);
	return ret;
}

double Min(vector<double> a){
	double ret = 1;
	for(double x : a) ret = min(ret, x);
	return ret;
}

double Var(vector<double> a){
	double ret = 0, ave = Mean(a);
	for(double x : a) ret += (x - ave) * (x - ave);
	return ret / (int)a.size();
}

int main(int argc, char **argv){
	if(argc < 8){
		printf("7 arguments are required (data_name, data_size, data_dim, num_of_centers, num_of_outliers, coreset_size, threshold).");
		return 0;
	}
	FILE *FIN = fopen(argv[1],"r");
	//N : number of datapoints;
	//D : dimension;
	//k : number of clusters;
	//m : number of outliers;
	//mm : size of coreset minus number of outliers, that is, we construct a coreset with size m + mm;
	//thr : used to determine how the rings and groups
	int N = atoi(argv[2]), D = atoi(argv[3]), k =atoi(argv[4]), m = atoi(argv[5]), mm = atoi(argv[6]);
	double thr = atof(argv[7]);
	dataset X;
	cerr<<"-----------Reading Data-----------"<<endl;
	for(int i = 1; i <= N; i++){
		datapoint x; x.resize(D + 1);
		x[0] = 1;
		for(int j = 1; j <= D; j++)
			fscanf(FIN, "%lf", &x[j]);
		X.push_back(x);
	}
	cerr<<"-----------Reading Over-----------"<<endl;
	int T = 10;
	double ave_our_construct_ti = 0, ave_our_lloyd_ti = 0;
	double ave_uni_construct_ti = 0, ave_uni_lloyd_ti = 0;
	double ave_ti = 0;
	vector<double> ores, ures, res;
	dataset OurCoreset, UniCoreset;
	for(int t = 1; t <= T; t++){
		double ti = clock();
		dataset C = kmeansm(X, k, m, 100, -1);
		double Xti = (clock() - ti)/(double)CLOCKS_PER_SEC;
		double xcost = rcost(X, C, m, 2);
		ave_ti += Xti;
		res.push_back(xcost);
		//cerr << Xti << " " << xcost << endl;

		ti = clock();
		OurCoreset = Our_Coreset(X, Approx_KRMeans(X, k, m, 2, -1, 1), m, mm, 2, thr);
		double our_construct_ti = (clock() - ti)/ (double)CLOCKS_PER_SEC;
			
		ti = clock();
		C = kmeansm(OurCoreset, k, m, 100, -1);
		double our_lloyd_ti = (clock() - ti)/ (double)CLOCKS_PER_SEC;

		double ocost = rcost(X, C, m, 2);

		ave_our_construct_ti += our_construct_ti;
		ave_our_lloyd_ti += our_lloyd_ti;

		ores.push_back(ocost);
		cerr <<"Round " << t << ". running on original dataset : " << Xti <<"; cost : " << xcost<< endl;
		cerr <<"Round " << t << ". ours, construction time : " << our_construct_ti << "; Lloyd time : " << our_lloyd_ti << "; cost : " << ocost << endl;
	}
	cerr << "Testing end." << endl;
	cerr << "Lloyd on original dataset , time : " << ave_ti / T << "; cost : " << Mean(res) << endl;
	cerr << "ours, total time : "<< ave_our_lloyd_ti / T + ave_our_construct_ti / T <<"; construction time : " << ave_our_construct_ti / T << "; Lloyd time : " << ave_our_lloyd_ti / T << "; cost : " << Mean(ores) << endl;
	return 0;
}