
#include "head.h"
#include "tools.h"
#include "function.h"
#include "kmeans.h"
#include "coreset.h"
using namespace std;

double Mean(vector<double> a) {
	double sum = 0;
	for (double x : a) sum += x;
	return sum / (int)a.size();
}

double Max(vector<double> a) {
	double ret = 0;
	for (double x : a) ret = max(ret, x);
	return ret;
}

double Min(vector<double> a) {
	double ret = 1;
	for (double x : a) ret = min(ret, x);
	return ret;
}

double Var(vector<double> a) {
	double ret = 0, ave = Mean(a);
	for (double x : a) ret += (x - ave) * (x - ave);
	return ret / (int)a.size();
}

vector<double> Ratio(vector<double> a, vector<double> b)
{
	vector<double> output;
	for (int i = 0; i < a.size(); i++)
	{
		output.push_back((double)a[i] / (double)b[i]);
	}
	return output;
}


int main(int argc, char** argv) {

	if (argc < 9) {
		printf("8 arguments are required (data_name, data_size, data_dim, num_of_centers, num_of_outliers, threshold, coreset size, z, k).");
		return 0;
	}
	FILE* FIN = fopen(argv[1], "r");
	//N : number of datapoints;
	//D : dimension;
	//k : number of clusters;
	//m : number of outliers;
	//thr : determine how the rings and groups are formed (see Implementation Details in Section 5 of our paper)
	//mm: examined coreset size
	//z: robust (k,z)-clustering

	int N = atoi(argv[2]), D = atoi(argv[3]), k = atoi(argv[4]), m = atoi(argv[5]);
	double thr = atof(argv[6]); int mm = atof(argv[7]);
	int z= atof(argv[8]);

	dataset X;
	cerr << "-----------Reading Data-----------" << endl;
	for (int i = 1; i <= N; i++) {
		datapoint x; x.resize(D + 1);
		x[0] = 1;
		for (int j = 1; j <= D; j++)
			fscanf(FIN, "%lf", &x[j]);
		X.push_back(x);
	}
	cerr << "-----------Reading Over-----------" << endl;
	cerr << "-----------Approximating KRMeans-----------" << endl;
	dataset C_approx;
	FILE* APPKM = fopen("approx_kmedian.txt", "r");
	int nn; fscanf(APPKM, "%d", &nn);
	for (int i = 0; i < nn; i++) {
		datapoint x; x.resize(D + 1);
		for (int i = 1; i < x.size(); i++) fscanf(APPKM, "%lf", &x[i]);
		C_approx.push_back(x);
	}
	cerr << rcost(X, C_approx, m, z) << endl;

	fclose(APPKM);
	cerr << "-----------Approximating End-----------" << endl;

	int TT = 20, T;
	vector<pair<dataset, double> > testdata;

	FILE* TESTDATA = fopen("testdata.txt", "r");
	fscanf(TESTDATA, "%d", &T);
	for (int i = 0; i < T; i++) {
		dataset A; double cst;
		fscanf(TESTDATA, "%lf", &cst);
		for (int j = 1; j <= k; j++) {
			datapoint x; x.resize(D + 1);
			for (int l = 1; l < x.size(); l++)
				fscanf(TESTDATA, "%lf", &x[l]);
			A.push_back(x);
		}
		testdata.push_back({ A, cst });
	}
	cerr << "-----------End-----------" << endl;


	vector<double> ores, sres, oldres;
	dataset OurCoreset, HLLW25Coreset, HJLW23Coreset;
	FILE* RES = fopen("res.txt", "a");
	for (int tt = 1; tt <= TT; tt++) {
		cerr << tt << " ";

		OurCoreset = Our_Coreset(X, C_approx, m, mm, z, thr / (m + mm));
		HLLW25Coreset = HLLW25_Coreset(X, C_approx, m, mm, z, thr / (m + mm));
		HJLW23Coreset = HJLW23_Coreset(X, C_approx, m, mm, z, thr / (m + mm));

		double max_oerr = 0, max_serr = 0,max_preerr = 0;

		for (int t = 0; t < T; t++) {
			dataset C = testdata[t].first;
			double xcost = testdata[t].second;
			double ocost = rcost(OurCoreset, C, m, z);
			double scost = rcost(HLLW25Coreset, C, m, z);
			double precost = rcost(HJLW23Coreset, C, m, z);

			double oerr = fabs(xcost - ocost) / xcost;
			double serr = fabs(xcost - scost) / xcost;
			double preerr = fabs(xcost - precost) / xcost;

			
			max_oerr = max(max_oerr, oerr);
			max_serr = max(max_serr, serr);
			max_preerr = max(max_preerr, preerr);

		}
		ores.push_back(max_oerr);
		sres.push_back(max_serr);
		oldres.push_back(max_preerr);
	}

	vector<double> rsres = Ratio(sres, ores);
	vector<double> roldres = Ratio(oldres, ores);

	fprintf(RES, "coreset size : %d\n", (int)OurCoreset.size());
	fprintf(RES, "Ours, ave: %lf max: %lf min: %lf var: %lf\n", Mean(ores), Max(ores), Min(ores), Var(ores));
	for (int i = 0; i < ores.size(); i++)
	{
		fprintf(RES, "%lf ", ores[i]);
	}
	fprintf(RES, "\n");


	fprintf(RES, "other size : %d\n", (int)HLLW25Coreset.size());
	fprintf(RES, "HLLW25, ave: %lf max: %lf min: %lf var: %lf\n", Mean(rsres), Max(rsres), Min(rsres), Var(rsres));
	for (int i = 0; i < sres.size(); i++)
	{
		fprintf(RES, "%lf ", sres[i]);
	}
	fprintf(RES, "\n");
	fprintf(RES, "HJLW23, ave: %lf max: %lf min: %lf var: %lf\n", Mean(roldres), Max(roldres), Min(roldres), Var(roldres));
	for (int i = 0; i < oldres.size(); i++)
	{
		fprintf(RES, "%lf ", oldres[i]);
	}
	fprintf(RES, "\n");

	cerr << "coreset size : " << (int)OurCoreset.size() << endl;
	cerr << "Ours, ave: " << Mean(ores) << "; max: " << Max(ores) << "; min: " << Min(ores) << "; var: " << Var(ores) << endl;
	for (int i = 0; i < ores.size(); i++)
	{
		cout << ores[i] << " ";
	}
	cout << endl;

	cerr << "other size : " << (int)HLLW25Coreset.size() << endl;
	cerr << "HLLW25, ave: " << Mean(rsres) << "; max: " << Max(rsres) << "; min: " << Min(rsres) << "; var: " << Var(rsres) << endl;
	for (int i = 0; i < sres.size(); i++)
	{
		cout << sres[i] << " ";
	}
	cout << endl;
	cerr << "HJLW23, ave: " << Mean(roldres) << "; max: " << Max(roldres) << "; min: " << Min(roldres) << "; var: " << Var(roldres) << endl;
	for (int i = 0; i < oldres.size(); i++)
	{
		cout << oldres[i] << " ";
	}
	cout << endl;

	fclose(RES);
	return 0;


}