﻿/**
File:		MachineLearning/Util/FgKMeans.h

Author:		
Email:		
Site:       

Copyright (c) 2017 . All rights reserved.
*/

#pragma once

#include <NeMachineLearningLib.h>
#include <MachineLearning/CommonUtil.h>

namespace NeuralEngine
{
	namespace MachineLearning
	{
		////////////////////////////////////////////////////////////////////////////////////////////////////
		/// <summary>	KMeans cluster. </summary>
		///
		/// <remarks>	
		/// 	K-means clustering is a method of vector quantization, originally from signal processing, 
		/// 	that is popular for cluster analysis in data mining. K-means clustering aims to partition 
		/// 	n observations into \f$k\f$ clusters in which each observation belongs to the cluster with the
		/// 	nearest mean, serving as a prototype of the cluster. This results in a partitioning of 
		/// 	the data space into Voronoi cells.
		/// 	
		/// 	The most common algorithm uses an iterative refinement technique. Due to its ubiquity it 
		/// 	is often called the k-means algorithm; it is also referred to as Lloyd's algorithm, 
		/// 	particularly in the computer science community.
		///
		///		Given an initial set of \f$k\f$ means \f$m_1^(1), …, m_k^(1)\f$ (see below), the algorithm proceeds
		///		by alternating between two steps:
		///
		///			Assignment step: 
		///			----------------
		///			
		///			Assign each observation to the cluster whose mean has the least squared Euclidean 
		///			distance, this is intuitively the "nearest" mean. (Mathematically, this means 
		///			partitioning the observations according to the Voronoi diagram generated by the means).
		///
		///				S_i^(t) = { x_p : ‖ x_p − m_i^(t) ‖^2 ≤ ‖ x_p − m_j^(t) ‖^2   ∀ j , 1 ≤ j ≤ k },
		///			
		///			where each x p{ \displaystyle x_{ p } } x_{ p } is assigned to exactly one S(t), 
		///			even if it could be assigned to two or more of them.
		///
		///			Update step: 
		///			------------
		///			
		///			Calculate the new means to be the centroids of the observations in the new clusters.
		///
		///				m_i^(t + 1) = 1/ |S_{ i }^{(t)}| sum _(x_j } ∈ S_i^(t)) x_j
		///
		///		The algorithm has converged when the assignments no longer change.There is no guarantee 
		///		that the optimum is found using this algorithm.
		///		[http://www.jstor.org/stable/2346830?origin=crossref&seq=1#page_scan_tab_contents]
		///
		///		The algorithm is often presented as assigning objects to the nearest cluster by distance.
		///		Using a different distance function other than (squared) Euclidean distance may stop the 
		///		algorithm from converging. Various modifications of k - means such as 
		///		spherical k - means and k - medoids have been proposed to allow using other distance 
		///		measures.
		/// 	
		/// 	Hmetal T, 29/03/2018. 
		/// </remarks>
		////////////////////////////////////////////////////////////////////////////////////////////////////
		template<typename Scalar>
		class NE_IMPEXP KMeans
		{
		public:

			////////////////////////////////////////////////////////////////////////////////////////////////////
			/// <summary>	Computes k means and cluster. </summary>
			///
			/// <remarks>	Hmetal T, 29/03/2018. </remarks>
			///
			/// <param name="means">   	[in,out] The means. </param>
			/// <param name="clusters">	[in,out] The clusters. </param>
			/// <param name="in">	   	The data array. </param>
			/// <param name="k">	   	Number of clusters to process. </param>
			/// <param name="iter">	   	(Optional) the iterator. </param>
			////////////////////////////////////////////////////////////////////////////////////////////////////
			static void Compute(af::array& means, af::array& clusters, const af::array& in, int k, int iter = 100);

		private:

			////////////////////////////////////////////////////////////////////////////////////////////////////
			/// <summary>	Associates data to computed means. </summary>
			///
			/// <remarks>	Hmetal T, 29/03/2018. </remarks>
			///
			/// <param name="data"> 	The data. </param>
			/// <param name="means">	The means. </param>
			///
			/// <returns>	A const af::array. </returns>
			////////////////////////////////////////////////////////////////////////////////////////////////////
			static af::array Clusterize(const af::array& data, const af::array& means);

			////////////////////////////////////////////////////////////////////////////////////////////////////
			/// <summary>	Creates a new means. </summary>
			///
			/// <remarks>	Hmetal T, 29/03/2018. </remarks>
			///
			/// <param name="data">	   	The data. </param>
			/// <param name="clusters">	The clusters. </param>
			/// <param name="k">	   	The int to process. </param>
			///
			/// <returns>	A const af::array. </returns>
			////////////////////////////////////////////////////////////////////////////////////////////////////
			static af::array NewMeans(af::array data, af::array clusters, int k);

			////////////////////////////////////////////////////////////////////////////////////////////////////
			/// <summary>	Euclidian distance. </summary>
			///
			/// <remarks>	Hmetal T, 29/03/2018. </remarks>
			///
			/// <param name="data"> 	The data. </param>
			/// <param name="means">	The means. </param>
			///
			/// <returns>	An array. </returns>
			////////////////////////////////////////////////////////////////////////////////////////////////////
			static af::array Distance(const af::array& data, const af::array& means);
		};
	}
}