//Experiment 3: speeding up spectal clustering

#define SPECTRAL_CLUSTERING
#include <cstdio>
#include <algorithm>
#include <iostream>
#include <vector>
#include <cmath>
#include "parameters.h"
#include "parameters3.h"
#include "tools.h"
#include "kernel.h"
#include "kmeans.h"
#include "coreset.h"
#include "empirical_error.h"

using namespace std;

double ObjectiveVal(const WeightedSet &points, vector<Center> &C, int z){
	double sum = 0, num = 0;
	vector<double> norm;
	for(int i = 0; i < C.size(); i++)
		norm.push_back(Norm(C[i]));
	vector<int> NN;
	for(int i = 0; i < points.size(); i ++){
		NN.push_back(Nearest(points[i].datapoint, C, norm, z));
	}
	for(int i =0 ; i < K; i++){
		vector<DataPoint> cur;
		for(int j = 0; j < points.size(); j++)
			if(NN[j] == i) cur.push_back(points[j].datapoint);
		if(cur.size() == 0) continue;
		num += 1;
		double sum1 = 0, sum2 = 0;
		for(int j = 0; j < cur.size(); j++)
			for(int k = 0; k < points.size(); k ++)
				sum2 += Similarity(cur[j], points[k].datapoint);
		for(int j = 0; j < cur.size(); j++)
			for(int k = 0; k < cur.size(); k++)
				sum1 += Similarity(cur[j], cur[k]);
		sum += (sum2 - sum1) / sum2;
	}
	if(num == K)
		return sum / K;
	else 
		return 1;
}

vector<double> err_coreset, err_ucoreset;

int main(int argc, char *argv[]){
	double *D = (double*)malloc((N+5) * sizeof(double));
	for(int i = 0; i < N; i ++) D[i] = 0;
	freopen(filename, "r", stdin);
	WeightedSet P;
	for(int i = 0; i < N; i++){
		DataPoint x; x.resize(DIM);
		for(int j = 0; j < DIM; j++){
			scanf("%lf", &x[j]);
		}
		x.push_back(i);
		P.push_back(make_pair(x, 1));
	}
	fclose(stdin);
	WeightedSet Q;
	vector<double> pd;
	double input_time = clock();

	for(int i = 0; i < P.size(); i++) pd.push_back(1);
	sampler S; S.init(pd);
	for(int i = 0; i < M; i++) Q.push_back(P[S.sample()]);
	for(int i = 0; i < N; i++)
		for(int j = 0; j < M; j++){
			double kxy = Similarity(P[i].datapoint, Q[j].datapoint);
			D[i] += kxy;
		}
	for(int i = 0; i < N; i++){
		D[i] = D[i] * N / M;
	}
	for(int i = 0; i < N; i++){
		P[i].weight = D[i];
		D[i] = 1.0 / D[i];
	}
	input_time = (clock() - input_time) / (double)CLOCKS_PER_SEC;
	double pure_kmeans_ave_time = 0;
	double pure_kmeans_ave_cost = 0;
	double coreset_kmeans_ave_time = 0;
	double coreset_kmeans_ave_cost = 0;
	double coreset_ave_time = 0;
	double kmeans_ave_time = 0;
	double best_cost = 1e90;

	vector<Center> bst; 
	printf("Parameters:\n");
	printf("\tDataSet: Name = \"%s\"; N = %d; Dimension = %d; Weight = 1.\n", DS, N, DIM);
	printf("\tAlgorithm: Coreset Size = %d; k = %d; z = %d; Similarity function = RBFKernel with sigma = %lf\n",CORESET_SIZE, K, Z, SIGMA);
	for(int i = 1; i <= T; i++){
		double tk;
		printf("Testing Round #%d:\n", i);
		tk = clock();
		WeightedSet coreset = Coreset(P, CORESET_SIZE, K, Z, 1);
		double coreset_time = ((double)clock() - tk) /(double)CLOCKS_PER_SEC;

		tk = clock();
		vector<Center> coreset_kmeans = Kmeans(coreset, K, Z);
		double kmeans_time = ((double)clock() - tk) / (double)CLOCKS_PER_SEC;
		double coreset_kmeans_cost = ObjectiveVal(P, coreset_kmeans, Z);

		coreset_kmeans_ave_time += coreset_time + kmeans_time + input_time;
		coreset_ave_time += coreset_time;
		kmeans_ave_time += kmeans_time;
		coreset_kmeans_ave_cost += coreset_kmeans_cost;
		if(coreset_kmeans_cost < best_cost){
			best_cost = coreset_kmeans_cost;
			bst = coreset_kmeans;
		}
		printf("Coreset Kmeans Running Time: %lf ; Objective Value: %lf ;\n", coreset_time + kmeans_time + input_time, coreset_kmeans_cost);
		printf("\t\tWhere Coreset Time: %lf ; Kmeans Time : %lf \n", coreset_time, kmeans_time);
	}
	coreset_kmeans_ave_time /= T;
	coreset_kmeans_ave_cost /= T;
	coreset_ave_time /= T;
	kmeans_ave_time /= T;
	puts("");
	printf("Parameters:\n");
	printf("\tDataSet: Name = \"%s\"; N = %d; Dimension = %d; Weight = 1.\n", DS, N, DIM);
	printf("\tAlgorithm: Coreset Size = %d; k = %d; z = %d; Similarity function = RBFKernel with sigma = %lf\n",CORESET_SIZE, K, Z, SIGMA);
	printf("Total Result:\n");
	printf("Coreset Kmeans Average Running Time: %lf ; Best Objective Value: %lf ;\n", coreset_kmeans_ave_time, best_cost);
	printf("\t\t Where Coreset Average Time: %lf ; Kmeans Average Time : %lf \n", coreset_ave_time, kmeans_ave_time);
	freopen("pred_label.txt","w",stdout);	//recond the best result
	vector<double> norm;
	for(int i = 0; i < K; i++)
		norm.push_back(Norm(bst[i]));
	for(int i = 0; i < N; i++){
		int NN = Nearest(P[i].datapoint, bst, norm, Z);
		printf("%d ",NN);
	}
}