/**
File:		MachineLearning/Optimization/Constrained/ScaledScaledConjugateGradient<Scalar, LSType>.cpp

Author:		
Email:		
Site:       

Copyright (c) 2017 . All rights reserved.
*/

#include <NeMachineLearningPCH.h>
#include <MachineLearning/FgSCG.h>
#include <MachineLearning/CommonUtil.h>
#include <cmath>
#include <limits>
#include <iomanip>

namespace NeuralEngine
{
	namespace MachineLearning
	{
		template class ScaledConjugateGradient<float, ArmijoBacktracking>;
		template class ScaledConjugateGradient<float, ArmijoBracketing>;
		template class ScaledConjugateGradient<float, MoreThuente>;
		template class ScaledConjugateGradient<float, StrongWolfeBacktracking>;
		template class ScaledConjugateGradient<float, StrongWolfeBracketing>;
		template class ScaledConjugateGradient<float, WolfeBacktracking>;
		template class ScaledConjugateGradient<float, WolfeBracketing>;

		template class ScaledConjugateGradient<double, ArmijoBacktracking>;
		template class ScaledConjugateGradient<double, ArmijoBracketing>;
		template class ScaledConjugateGradient<double, MoreThuente>;
		template class ScaledConjugateGradient<double, StrongWolfeBacktracking>;
		template class ScaledConjugateGradient<double, StrongWolfeBracketing>;
		template class ScaledConjugateGradient<double, WolfeBacktracking>;
		template class ScaledConjugateGradient<double, WolfeBracketing>;

		template<typename Scalar, LineSearchType LSType>
		ScaledConjugateGradient<Scalar, LSType>::ScaledConjugateGradient(int numberOfVariables)
			: BaseGradientOptimizationMethod<Scalar, LSType>(numberOfVariables)

			/*_method(FletcherReeves),
			_status(Success)*/
		{

		}

		template<typename Scalar, LineSearchType LSType>
		ScaledConjugateGradient<Scalar, LSType>::ScaledConjugateGradient(int numberOfVariables, std::function<Scalar(const af::array&, af::array&)> function)
			: BaseGradientOptimizationMethod<Scalar, LSType>(numberOfVariables, function)

			/*_method(FletcherReeves),
			_status(Success)*/
		{

		}

		template<typename Scalar, LineSearchType LSType>
		ScaledConjugateGradient<Scalar, LSType>::ScaledConjugateGradient(NonlinearObjectiveFunction<Scalar> * function)
			: BaseGradientOptimizationMethod<Scalar, LSType>(function)
		{
		}

		template<typename Scalar, LineSearchType LSType>
		ScaledConjugateGradient<Scalar, LSType>::~ScaledConjugateGradient()
		{
		}

		template<typename Scalar, LineSearchType LSType>
		bool ScaledConjugateGradient<Scalar, LSType>::Optimize(int* cycle)
		{
			int nparams = BaseGradientOptimizationMethod::GetNumberOfVariables();

			af::array x = GetSolution();
			af::array xnew;
			af::array xplus;
			af::array gplus;

			Scalar sigma0 = 1.0e-4;

			Scalar fold = _function->Value(x);   // Initial function value.
			Scalar fnow = fold;

			af::array gradnew = _function->Gradient(x);			// Initial gradient.
			af::array gradold = gradnew.copy();

			af::array d = -gradnew.copy();              // Initial search direction.

			bool success = true;                        // Force calculation of directional derivs.
			bool finish = false;
			int nsuccess = 0;                           // nsuccess counts number of successes.
			Scalar beta = 1.0f;                         // Initial scale parameter.
			Scalar betamin = 1.0e-15;                   // Lower bound on scale.
			Scalar betamax = 1.0e100;                   // Upper bound on scale.
			int iterations = 1;                         // j counts number of iterations.

			if (cycle)
				*cycle = iterations;

			Scalar sigma, delta, kappa = 0, theta = 0, mu = 0, alpha = 0, Delta = 0, gamma = 0;

			Scalar fnew;
			// Main optimization loop.

			std::cout << "Numerical Optimization via SCG\n==============================\n" << std::endl;
			std::cout << "Starting Value: " << fold << std::endl << std::endl;
			while (!finish)
			{
				if (cycle)
					*cycle = iterations;
				// Calculate first and second directional derivatives.
				if (success)
				{
					mu = af::matmulTN(d, gradnew).scalar<Scalar>();
					if (mu >= 0)
					{
						d = -gradnew.copy();
						mu = af::matmulTN(d, gradnew).scalar<Scalar>();
					}
					kappa = af::matmulTN(d, d).scalar<Scalar>();
					if (kappa < DBL_EPSILON)
						return true;

					sigma = sigma0 / std::sqrt(kappa);
					xplus = x + sigma * d;

					//SetSolution(xplus);

					gplus = _function->Gradient(xplus);
					theta = af::matmulTN(d, gplus - gradnew).scalar<Scalar>() / sigma;
				}

				// Increase effective curvature and evaluate step size alpha.
				delta = theta + beta * kappa;
				if (delta <= 0)
				{
					delta = beta * kappa;
					beta = beta - theta / kappa;
				}
				alpha = -mu / delta;

				// Calculate the comparison ratio.
				xnew = x + alpha * d;

				//function.Parameters = xnew.C;

				fnew = _function->Value(xnew);

				Delta = 2 * (fnew - fold) / (alpha * mu);
				if (Delta >= 0)
				{
					success = true;
					nsuccess = nsuccess + 1;
					x = xnew.copy();
					fnow = fnew;
				}
				else
				{
					success = false;
					fnow = fold;
				}

				std::cout << "Cycle: " << iterations << "\tf(x): " << fnow << "\t\tStep Size: " << beta << std::endl;
				if (cycle)
					cycle = &iterations;

				if (success)
				{
					// Test for termination
					if (af::max(af::abs(alpha * d)).scalar<Scalar>() < _tolerance && std::abs(fnew - fold) < _tolerance)
						finish = true;
					if (maxIterations > 0)
						if (iterations + 1 > maxIterations)
							finish = true;
						else
						{
							// Update variables for new position
							fold = fnew;
							gradold = gradnew.copy();

							_x = x.copy();

							gradnew = _function->Gradient(x);
							// If the gradient is zero then we are done.
							if (af::matmulTN(gradnew, gradnew).scalar<Scalar>() == 0.0)
								finish = true;
						}
				}

				if (!finish)
				{
					// Adjust beta according to comparison ratio.
					if (Delta < 0.25)
						beta = std::min<Scalar>(4.0 * beta, betamax);

					if (Delta > 0.75)
						beta = std::max<Scalar>(0.5 * beta, betamin);


					// Update search direction using Polak-Ribiere formula, or re-start 
					// in direction of negative gradient after nparams steps.
					if (nsuccess == nparams)
					{
						d = -gradnew.copy();
						nsuccess = 0;
					}
					else
					{
						if (success)
						{
							gamma = af::matmulTN(gradold - gradnew, gradnew).scalar<Scalar>() / mu;
							d = gamma * d - gradnew;
						}
					}
					iterations++;
				}
			}
			SetSolution(x);
			return true;
		}
	}
}