/*
experiment: the impact of the number of outliers on the empirical error.
*/

#include "../lib/head.h"
#include "../lib/tools.h"
#include "../lib/function.h"
#include "../lib/kmeans.h"
#include "../lib/coreset.h"


double Mean(vector<double> a){
	double sum = 0;
	for(double x : a) sum += x;
	return sum / (int)a.size();
}

double Max(vector<double> a){
	double ret = 0;
	for(double x : a) ret = max(ret, x);
	return ret;
}

double Min(vector<double> a){
	double ret = 1;
	for(double x : a) ret = min(ret, x);
	return ret;
}

double Var(vector<double> a){
	double ret = 0, ave = Mean(a);
	for(double x : a) ret += (x - ave) * (x - ave);
	return ret / (int)a.size();
}

int main(int argc, char **argv){
	if(argc < 8){
		printf("7 arguments are required (data_name, data_size, data_dim, num_of_centers, num_of_outliers, coreset_size, threshold).");
		return 0;
	}
	FILE *FIN = fopen(argv[1],"r");	
	//N : number of datapoints;
	//D : dimension;
	//k : number of clusters;
	//m : number of outliers;
	//mm : size of coreset minus number of outliers, that is, we construct a coreset with size m + mm;
	//thr : used to determine how the rings and groups
	int N = atoi(argv[2]), D = atoi(argv[3]), k =atoi(argv[4]), m = atoi(argv[5]), mm = atoi(argv[6]);
	double thr = atof(argv[7]);
	dataset X;
	cerr<<"-----------Reading Data-----------"<<endl;
	for(int i = 1; i <= N; i++){
		datapoint x; x.resize(D + 1);
		x[0] = 1;
		for(int j = 1; j <= D; j++)
			fscanf(FIN, "%lf", &x[j]);
		X.push_back(x);
	}
	cerr<<"-----------Reading Over-----------"<<endl;
	cerr<<"-----------Approximating KRMeans-----------" << endl;
	dataset C_approx; //= Approx_KRMeans(X, k, m, 1, -1, 10);
	FILE *APPKM = fopen("approx_kmeans.txt","r");
	int nn; fscanf(APPKM, "%d", &nn);
	for(int i = 0; i < nn; i++){
		datapoint x; x.resize(D + 1);
		for(int i = 1; i < x.size(); i++) fscanf(APPKM, "%lf", &x[i]);
		C_approx.push_back(x);
	}
	cerr << rcost(X, C_approx, m, 1) << endl;
	cerr<<"-----------Approximating End-----------"<<endl;
	cerr<<"-----------Reading Test Data-----------"<<endl;
	int T = 500, TT = 50;
	vector<pair<dataset, double> > testdata;

	FILE *TESTDATA = fopen("testdata.txt", "r");
	fscanf(TESTDATA, "%d", &T);
	for(int i = 0; i < T; i++){
		dataset A; double cst;
		fscanf(TESTDATA, "%lf", &cst);
		for(int j = 1; j <= k; j++){
			datapoint x; x.resize(D + 1);
			for(int l = 1; l <x.size(); l ++)
				fscanf(TESTDATA, "%lf", &x[l]);
			A.push_back(x);
		}
		testdata.push_back({A, rcost(X, A, m, 1)});
	}
	cerr<<"-----------Generating End-----------"<<endl;
	
	FILE *IMP = fopen("importance.txt", "r");
	vector<pair<datapoint, double> > Y;
	for(int i = 1; i <= N; i ++){
		double w; fscanf(IMP, "%lf", &w);
		datapoint x; x.resize(D + 1);
		for(int i = 0; i < x.size(); i++) fscanf(IMP, "%lf", &x[i]);
		Y.push_back({x,w});
	}
	vector<double> ures, ores, fres, pres;
	dataset OurCoreset, PureUniCor, UniCoreset, ImpCoreset;
	FILE *RES = fopen("res.txt", "a");

	for(int tt = 1; tt <= TT; tt ++){
		//cerr <<"Round " << tt <<endl;
		cerr << tt <<" ";
		OurCoreset = Our_Coreset(X, C_approx, m, mm, 1, thr);
		UniCoreset = Outlier_Uniform(X, C_approx, m, mm);
		ImpCoreset = Imp_Coreset(Y, m+mm);
		PureUniCor = Uniform_Coreset(X, m + mm);
		//cerr<<"Coreset size: " << (int)OurCoreset.size() <<endl;
		

		double max_uerr = 0, max_oerr = 0, max_ferr = 0, max_perr = 0; 
		for(int t = 0; t < T; t++){
			dataset C = testdata[t].first;
			double xcost = testdata[t].second;
			double ocost = rcost(OurCoreset, C, m, 1);
			double ucost = rcost(UniCoreset, C, m, 1);
			double fcost = rcost(ImpCoreset, C, m, 1);
			double pcost = rcost(PureUniCor, C, m, 1);
			double oerr = fabs(xcost - ocost) / xcost;
			double ferr = fabs(xcost - fcost) / xcost;
			double uerr = fabs(xcost - ucost) / xcost;
			double perr = fabs(xcost - pcost) / xcost;

			max_uerr = max(max_uerr, uerr);
			max_oerr = max(max_oerr, oerr);
			max_ferr = max(max_ferr, ferr);
			max_perr = max(max_perr, perr);
		}
		/*cerr <<"Coreset size = " << (int)OurCoreset.size() << endl;
		cerr << "On average, ours: " << max_oerr << "; outliers + uniform: " << max_uerr
			 << "; pure uniform: " << max_perr << "; imp: " << max_ferr << endl;*/
		ures.push_back(max_uerr);
		ores.push_back(max_oerr);
		fres.push_back(max_ferr);
		pres.push_back(max_perr);
	}

	cerr <<"Testing end." <<endl;
	fprintf(RES, "coreset size : %d\n", (int)OurCoreset.size());
	fprintf(RES, "Ours, ave: %lf max: %lf min: %lf var: %lf\n", Mean(ores), Max(ores), Min(ores), Var(ores));
	fprintf(RES, "OAUS, ave: %lf max: %lf min: %lf var: %lf\n", Mean(ures), Max(ures), Min(ures), Var(ures));
	fprintf(RES, "US, ave: %lf max: %lf min: %lf var: %lf\n", Mean(pres), Max(pres), Min(pres), Var(pres));
	fprintf(RES, "SS, ave: %lf max: %lf min: %lf var: %lf\n", Mean(fres), Max(fres), Min(fres), Var(fres));

	cerr << "coreset size : " << (int)OurCoreset.size() << " "<< (int)UniCoreset.size() << (int)ImpCoreset.size() << (int)PureUniCor.size() <<endl;
	cerr <<"Ours, ave: " << Mean(ores) << "; max: " << Max(ores) <<"; min: " << Min(ores) << "; var: " << Var(ores) << endl;
	cerr <<"OAUS, ave: " << Mean(ures) << "; max: " << Max(ures) <<"; min: " << Min(ures) << "; var: " << Var(ures) << endl;
	cerr <<"US, ave: " << Mean(pres) << "; max: " << Max(pres) <<"; min: " << Min(pres) << "; var: " << Var(pres) << endl;
	cerr <<"SS, ave: " << Mean(fres) << "; max: " << Max(fres) <<"; min: " << Min(fres) << "; var: " << Var(fres) << endl;
	return 0;
}