/*
preparation for experiment
*/

#include "head.h"
#include "tools.h"
#include "function.h"
#include "kmeans.h"
#include "coreset.h"

double Mean(vector<double> a) {
	double sum = 0;
	for (double x : a) sum += x;
	return sum / (int)a.size();
}

double Max(vector<double> a) {
	double ret = 0;
	for (double x : a) ret = max(ret, x);
	return ret;
}

double Min(vector<double> a) {
	double ret = 1;
	for (double x : a) ret = min(ret, x);
	return ret;
}

double Var(vector<double> a) {
	double ret = 0, ave = Mean(a);
	for (double x : a) ret += (x - ave) * (x - ave);
	return ret / (int)a.size();
}

int main(int argc, char** argv) {
	if (argc < 6) {
		printf("5 arguments are required (data_name, data_size, data_dim, num_of_centers, num_of_outliers).");
		return 0;
	}
	FILE* FIN = fopen(argv[1], "r");
	int N = atoi(argv[2]), D = atoi(argv[3]), k = atoi(argv[4]), m = atoi(argv[5]);
	dataset X;
	cerr << "-----------Reading Data-----------" << endl;
	for (int i = 1; i <= N; i++) {
		datapoint x; x.resize(D + 1);
		x[0] = 1;
		for (int j = 1; j <= D; j++)
			fscanf(FIN, "%lf", &x[j]);
		X.push_back(x);
	}
	cerr << "-----------Reading Over-----------" << endl;
	cerr << "-----------Approximating KRMeans-----------" << endl;
	dataset C_approx = Approx_KRMeans(X, k, m, 1, -1, 10);
	cerr << rcost(X, C_approx, m, 1) << endl;
	FILE* APPKM = fopen("approx_kmedian.txt", "w");
	fprintf(APPKM, "%d\n", (int)C_approx.size());
	for (datapoint x : C_approx) {
		for (int i = 1; i < x.size(); i++) fprintf(APPKM, "%lf ", x[i]);
		fprintf(APPKM, "\n");
	}
	cerr << "-----------Approximating End-----------" << endl;
	cerr << "-----------Generating Test Data-----------" << endl;
	int T = 500;
	vector<pair<dataset, double> > testdata;
	for (int t = 1; t <= T; t++) {
		cerr << t << endl;
		dataset C;
		for (int i = 1; i <= k; i++)
			C.push_back(X[(int)(randm() * N)]);
		testdata.push_back(make_pair(C, rcost(X, C, m, 1)));
	}
	FILE* TESTDATA = fopen("testdata.txt", "w");
	fprintf(TESTDATA, "%d\n", T);
	for (int i = 0; i < T; i++) {
		fprintf(TESTDATA, "%lf\n", testdata[i].second);
		for (datapoint x : testdata[i].first) {
			for (int j = 1; j < x.size(); j++)
				fprintf(TESTDATA, "%lf ", x[j]);
			fprintf(TESTDATA, "\n");
		}
	}
	cerr << "-----------Generating End-----------" << endl;
	return 0;
}