
#include "head.h"
#include "tools.h"
#include "function.h"
#include "kmeans.h"
#include "coreset.h"
using namespace std;

double Mean(vector<double> a) {
	double sum = 0;
	for (double x : a) sum += x;
	return sum / (int)a.size();
}

double Max(vector<double> a) {
	double ret = 0;
	for (double x : a) ret = max(ret, x);
	return ret;
}

double Min(vector<double> a) {
	double ret = 1;
	for (double x : a) ret = min(ret, x);
	return ret;
}

double Var(vector<double> a) {
	double ret = 0, ave = Mean(a);
	for (double x : a) ret += (x - ave) * (x - ave);
	return ret / (int)a.size();
}




int main(int argc, char** argv) {

	if (argc < 7) {
		printf("6 arguments are required (data_name, data_size, data_dim, num_of_centers, num_of_outliers, threshold).");
		return 0;
	}
	FILE* FIN = fopen(argv[1], "r");
	//N : number of datapoints;
	//D : dimension;
	//k : number of clusters;
	//m : number of outliers;
	//thr : determine how the rings and groups are formed (see Implementation Details in Section 5 of our paper)

	int N = atoi(argv[2]), D = atoi(argv[3]), k = atoi(argv[4]), m = atoi(argv[5]);
	double thr = atof(argv[6]);

	dataset X;
	cerr << "-----------Reading Data-----------" << endl;
	for (int i = 1; i <= N; i++) {
		datapoint x; x.resize(D + 1);
		x[0] = 1;
		for (int j = 1; j <= D; j++)
			fscanf(FIN, "%lf", &x[j]);
		X.push_back(x);
	}
	cerr << "-----------Reading Over-----------" << endl;
	cerr << "-----------Approximating KRMeans-----------" << endl;
	dataset C_approx;
	FILE* APPKM = fopen("approx_kmeans.txt", "r");
	int nn; fscanf(APPKM, "%d", &nn);
	for (int i = 0; i < nn; i++) {
		datapoint x; x.resize(D + 1);
		for (int i = 1; i < x.size(); i++) fscanf(APPKM, "%lf", &x[i]);
		C_approx.push_back(x);
	}
	cerr << rcost(X, C_approx, m, 2) << endl;

	fclose(APPKM);
	cerr << "-----------Approximating End-----------" << endl;

	int TT = 10, T;
	vector<pair<dataset, double> > testdata;

	FILE* TESTDATA = fopen("testdata.txt", "r");
	fscanf(TESTDATA, "%d", &T);
	for (int i = 0; i < T; i++) {
		dataset A; double cst;
		fscanf(TESTDATA, "%lf", &cst);
		for (int j = 1; j <= k; j++) {
			datapoint x; x.resize(D + 1);
			for (int l = 1; l < x.size(); l++)
				fscanf(TESTDATA, "%lf", &x[l]);
			A.push_back(x);
		}
		testdata.push_back({ A, cst });
	}
	cerr << "-----------End-----------" << endl;

	
	dataset OurCoreset, HLLW25Coreset, HJLW23Coreset;
	FILE* RES = fopen("res.txt", "a");
	//mm : size of coreset minus number of outliers, that is, we construct a coreset with size m + mm;
	for (int mm = 200; mm <= 4500; mm += 100)
	{
		vector<double> ores, sres, pres;
		for (int tt = 1; tt <= TT; tt++) {
			cerr << tt << " ";
			OurCoreset = Our_Coreset(X, C_approx, m, mm, 2, thr / (m + mm));
			HLLW25Coreset = HLLW25_Coreset(X, C_approx, m, mm, 2, thr / (m + mm));
			HJLW23Coreset = HJLW23_Coreset(X, C_approx, m, mm, 2, thr / (m + mm));

			double max_oerr = 0, max_serr = 0, max_perr = 0;
			for (int t = 0; t < T; t++) {
				dataset C = testdata[t].first;
				double xcost = testdata[t].second;
				double ocost = rcost(OurCoreset, C, m, 2);

				double scost = rcost(HLLW25Coreset, C, m, 2);
				double pcost = rcost(HJLW23Coreset, C, m, 2);

				double oerr = fabs(xcost - ocost) / xcost;

				double serr = fabs(xcost - scost) / xcost;

				double perr = fabs(xcost - pcost) / xcost;



				max_oerr = max(max_oerr, oerr);
				max_serr = max(max_serr, serr);
				max_perr = max(max_perr, perr);
			}

			ores.push_back(max_oerr);
			sres.push_back(max_serr);
			pres.push_back(max_perr);
		}

		cerr << "Testing end." << endl;
		fprintf(RES, "coreset size : %d\n", (int)OurCoreset.size());
		fprintf(RES, "Ours, ave: %lf max: %lf min: %lf var: %lf\n", Mean(ores), Max(ores), Min(ores), Var(ores));

		fprintf(RES, "other size : %d\n", (int)HLLW25Coreset.size());
		fprintf(RES, "HLLW25, ave: %lf max: %lf min: %lf var: %lf\n", Mean(sres), Max(sres), Min(sres), Var(sres));
		fprintf(RES, "HJL23, ave: %lf max: %lf min: %lf var: %lf\n", Mean(pres), Max(pres), Min(pres), Var(pres));

		cerr << "coreset size : " << (int)OurCoreset.size() << endl;
		cerr << "Ours, ave: " << Mean(ores) << "; max: " << Max(ores) << "; min: " << Min(ores) << "; var: " << Var(ores) << endl;
		cerr << "other size : " << (int)HLLW25Coreset.size() << endl;
		cerr << "HLLW25, ave: " << Mean(sres) << "; max: " << Max(sres) << "; min: " << Min(sres) << "; var: " << Var(sres) << endl;
		cerr << "HJLW23, ave: " << Mean(pres) << "; max: " << Max(pres) << "; min: " << Min(pres) << "; var: " << Var(pres) << endl;
	}


	return 0;





}