#include "powerlawCommon.h"

#include <gsl/gsl_statistics_double.h>
#include <limits>

#include "../random.hpp"

/**
 * @author: W.M. Otte (wim@invivonmr.uu.nl); Image Sciences Institute, UMC Utrecht, NL.
 * @date: 19-11-2009
 *
 * Function implementations of powerlaw scaling parameter estimation.
 *
 * ***************************************************************************
 * Method: "Power-law distributions in empirical data", Clauset et al, 2009
 * http://www.santafe.edu/~aaronc/powerlaws/
 * ***************************************************************************
 */
namespace plfit
{
  /**
   *
   */

  void Powerlaw::SingleFit( const VectorType& V, VectorType& results, bool nosmall, bool finite, float startValue,
                           float incrementValue, float endValue )
  {
    // determine mle-type (discrete or continuous) ...
    bool discrete = IsDiscrete( V );

    if ( discrete )
    {
      //std::cout << "*** INFO ***: Discrete maximum likelihood estimation." << std::endl;
      MleInt( V, nosmall, finite, startValue, incrementValue, endValue, results );

    } else
    {
      //std::cout << "*** INFO ***: Continuous maximum likelihood estimation." << std::endl;
      MleReal( V, nosmall, finite, results );
    }
  }

  /**
   *
   */

  void Powerlaw::BootstrapFit( const VectorType& V, VectorType& results, bool nosmall, bool finiteSize,
                              ValueType startValue, ValueType incrementValue, ValueType endValue, unsigned int bootstrapIterations, bool verbose )
  {
    // determine mle-type (discrete or continuous) ...
    bool discrete = IsDiscrete( V );

    Bootstrap( V, nosmall, finiteSize, startValue, incrementValue, endValue, discrete, bootstrapIterations, results, verbose );
  }

  /**
   * Return true of all vector elements are integers.
   */

  bool Powerlaw::IsDiscrete( const VectorType& V )
  {
    typename VectorType::const_iterator new_end = std::find_if( V.begin(), V.end(), floating_point< ValueType >() );
    return ( V.end() == new_end );
  }

  /**
   * Remove duplicated values from vector and return unique values vector.
   */

  void Powerlaw::Unique( const VectorType& V, VectorType& results )
  {
    VectorType tmp;

    std::copy( V.begin(), V.end(), std::back_inserter( tmp ) );

    // first sort...
    std::sort( tmp.begin(), tmp.end() );

    // determine end point of unique vector elements...
    typename VectorType::iterator new_end = std::unique( tmp.begin(), tmp.end() );

    // iterator...
    typename VectorType::iterator it = tmp.begin();

    // copy all unique elements in return vector...
    while ( it != new_end )
    {
      results.push_back( *it );
      it++;
    }
  }

  /**
   * Remove last element of given vector.
   */

  void Powerlaw::RemoveLastElement( VectorType& V )
  {
    V.erase( V.end() - 1, V.end() );
  }

  /**
   * Return sorted inport vector.
   */

  void Powerlaw::Sort( const VectorType& V, VectorType& W )
  {
    std::copy( V.begin(), V.end(), std::back_inserter( W ) );
    std::sort( W.begin(), W.end() );
  }

  /**
   *
   */

  void Powerlaw::KeepHigherOrEqual( VectorType& V, ValueType x )
  {
    typename VectorType::iterator new_end = std::remove_if( V.begin(), V.end(), std::bind2nd( std::less< ValueType >(), x ) );

    V.erase( new_end, V.end() );
  }

  /**
   *
   */

  void Powerlaw::KeepLowerOrEqual( VectorType& V, ValueType x )
  {
    typename VectorType::iterator new_end = std::remove_if( V.begin(), V.end(), std::bind2nd( std::greater_equal< ValueType >(), x ) );

    V.erase( new_end, V.end() );
  }

  /**
   * Return incremental vector.
   */

  void Powerlaw::GetIncrementVector( ValueType start, ValueType increment, ValueType end, VectorType& V )
  {
    int n = ( end - start ) / increment;

    if ( n <= 0 )
    {
      std::cerr << "*** WARNING ***: Increment vector is set to size: 0!" << std::endl;
    }

    for ( ; start <= end; start += increment )
    {
      V.push_back( start );
    }
  }

  /**
   * Return standard deviation (sqrt variance).
   */

  ValueType Powerlaw::GetSD( const std::vector< ValueType >& V )
  {
    return gsl_stats_sd(V.data(), 1, V.size());
  }

  /**
   * Return cumulative sum of input vector.
   */

  void Powerlaw::CumulativeSum( const VectorType& V, VectorType& W )
  {
    std::copy( V.begin(), V.end(), std::back_inserter( W ) );

    for ( unsigned int i = 1; i < V.size(); i++ )
    {
      W[i] += W[i - 1];
    }
  }

  /**
   * Return uniform random values vector from inputs, with similar size.
   */

  void Powerlaw::GetRandomValue( const VectorType& inputs, VectorType& results )
  {
    for ( unsigned int i = 0; i < inputs.size(); i++ )
    {
      results.push_back(inputs[randint((int) inputs.size())]);
    }
  }

  /**
   * Discrete Maximum likelihood estimation.
   */

  void Powerlaw::MleInt( const VectorType& x, bool nosmall, bool finiteSize, ValueType startValue, ValueType increment,
                        ValueType endValue, VectorType& results )
  {
    VectorType vec;
    GetIncrementVector( startValue, increment, endValue, vec );
    bool finiteSizeMessagePrinted = false;

    VectorType zvec( vec.size() );
    std::transform( vec.begin(), vec.end(), zvec.begin(), zeta() );

    VectorType xmins;
    Unique( x, xmins );

    RemoveLastElement( xmins );

    // first and second column of data matrix...
    VectorType dat1( xmins.size() );
    VectorType dat2( xmins.size() );

    VectorType sorted_x;

    Sort( x, sorted_x );

    ValueType Y = 0;
    ValueType xmax = *( std::max_element( sorted_x.begin(), sorted_x.end() ) );

    for ( unsigned int xm = 0; xm < xmins.size(); xm++ )
    {
      ValueType xmin = xmins[xm];

      VectorType z( sorted_x );

      KeepHigherOrEqual( z, xmin );

      ValueType n = (ValueType) z.size();

      // fill L with -Inf
      VectorType L( vec.size(), -std::numeric_limits< ValueType >::infinity() );

      // use copy of z, because z is used again later...
      VectorType tmp( z );
      std::transform( z.begin(), z.end(), tmp.begin(), log< ValueType > () );
      ValueType slogz = std::accumulate( tmp.begin(), tmp.end(), static_cast< ValueType > ( 0 ) );

      // xminvec = (1:xmin-1) ...
      VectorType xminvec_root( xmin - 1 );
      for (int i=0; i < xminvec_root.size(); ++i) xminvec_root[i] = 1+i;

      for ( unsigned int k = 0; k < vec.size(); k++ )
      {
        VectorType xminvec( xminvec_root );

        ValueType exp = vec[k];

        // xminvec.^-vec(k)...
        std::transform( xminvec.begin(), xminvec.end(), xminvec.begin(), std::bind2nd( power< ValueType > (), -exp ) );

        // sum( xminvec.^-vec(k))...
        ValueType sum = std::accumulate( xminvec.begin(), xminvec.end(), static_cast< ValueType > ( 0 ) );

        // log-likelihood
        L[k] = -exp * slogz - n * std::log( zvec[k] - sum );
      }

      typename VectorType::iterator max_it = std::max_element( L.begin(), L.end() );

      Y = *( max_it );
      unsigned int I = (unsigned int) std::distance( L.begin(), max_it );

      //  compute KS statistic

      ValueType exp = vec[I];

      // xmin:xmax ...
      VectorType xmin_xmax( xmax - ( xmin - 1 ) );
      for (int i = 0; i < xmin_xmax.size(); ++i) xmin_xmax[i] = xmin + i;

      // first_part = ( ( ( xmin:xmax ).^-exp ) ) ...
      VectorType first_part( xmin_xmax.size() );

      std::transform( xmin_xmax.begin(), xmin_xmax.end(), first_part.begin(), std::bind2nd( power< ValueType > (), -exp ) );

      // second_part = (zvec(I) - sum( ( 1:xmin-1 ).^-exp ) ) ...

      // 1:xmin - 1 ...
      VectorType pp( xmin );
      for (int i = 0; i < pp.size(); ++i) pp[i] = 1+i;
      RemoveLastElement( pp );

      // ( 1:xmin - 1 ) .^ - exp ...
      std::transform( pp.begin(), pp.end(), pp.begin(), std::bind2nd( power< ValueType > (), -exp ) );

      // sum( ( 1:xmin -1 ) .^ - exp ) ...
      ValueType sum_sp = std::accumulate( pp.begin(), pp.end(), static_cast< ValueType > ( 0 ) );

      // zvec[I] - sum( ( 1:xmin -1 ) .^ - exp ) ...
      ValueType second_part = zvec[I] - sum_sp;

      // first_part / second_part ...
      std::transform( first_part.begin(), first_part.end(), first_part.begin(), std::bind2nd( std::divides< ValueType >(),
                                                                                             second_part ) );

      // fit = cumsum( first_part /. second_part ); ==
      // fit = cumsum((((xmin:xmax).^-vec(I)))./ (zvec(I) - sum((1:xmin-1).^-vec(I)))) ...
      VectorType fit;
      CumulativeSum( first_part, fit );

      // normalized histogram ...
      Histogram hist( z, xmin, xmax, ( xmax - xmin ), true );
      VectorType histogram = hist.getHistogram();

      //  cdi = cumsum(hist(z, xmin:xmax)./n) ...
      VectorType cdi;
      CumulativeSum( histogram, cdi );

      std::transform( fit.begin(), fit.end(), cdi.begin(), fit.begin(), std::minus< ValueType >() );
      std::transform( fit.begin(), fit.end(), fit.begin(), abs< ValueType > () );

      // dat(xm,:) = [max(abs( fit - cdi )) vec(I)] ...
      dat1[xm] = *( std::max_element( fit.begin(), fit.end() ) );
      dat2[xm] = vec[I];
    }

    // select the index for the minimum value of D
    // [ D, I ] = min( dat( :, 1 ) ) ...
    typename VectorType::iterator min_it = std::min_element( dat1.begin(), dat1.end() );

    unsigned int I = (unsigned int) std::distance( dat1.begin(), min_it );
    ValueType xmin = xmins[I];

    // z = x(x>=xmin);
    // n = length(z);
    VectorType z( x );
    KeepHigherOrEqual( z, xmin );
    ValueType n = (ValueType) z.size();

    // alpha = dat( I, 2 ) ...
    ValueType alpha = dat2[I];

    // finite-size correction
    if ( finiteSize )
    {
      alpha = alpha * ( static_cast< ValueType > ( n ) - 1 ) / static_cast< ValueType > ( n ) + 1 / static_cast< ValueType > ( n );
    }

    if ( !finiteSize && ( n < 50 ) && ( !finiteSizeMessagePrinted ) )
    {
      std::cout << "*** WARNING ***: finite-size bias may be present!" << std::endl;
      finiteSizeMessagePrinted = true;
    }

    // L = -alpha * sum( log( z ) ) - n * log( zvec( find( vec <= alpha, 1 , 'last' ) ) - sum((1:xmin-1).^-alpha));

    // 1:xmin - 1 ...
    VectorType pp( xmin );
    for (int i = 0; i < pp.size(); ++i) pp[i] = 1+i;
    RemoveLastElement( pp );
    // ( 1:xmin - 1 ) .^ - alpha ...
    std::transform( pp.begin(), pp.end(), pp.begin(), std::bind2nd( power< ValueType >(), -alpha ) );
    // sum( ( 1:xmin -1 ) .^ - alpha ) ...
    ValueType sum_third_part = std::accumulate( pp.begin(), pp.end(), static_cast< ValueType > ( 0 ) );

    std::reverse( vec.begin(), vec.end() );
    typename VectorType::iterator it_last = std::find_if( vec.begin(), vec.end(), std::bind2nd( std::less_equal< ValueType >(), alpha ) );
    unsigned int index = (unsigned int) std::distance( vec.begin(), it_last ) + 1;
    // n * log( zvec( find( vec <= alpha, 1 , 'last' ) ) - sum((1:xmin-1).^-alpha)) ...
    ValueType sum_second_part = n * std::log( zvec[ vec.size() - index ] - sum_third_part );

    // -alpha * sum( log( z ) ) ...
    std::transform( z.begin(), z.end(), z.begin(), log< ValueType >() );
    ValueType sum_first_part = -alpha * std::accumulate( z.begin(), z.end(), static_cast< ValueType > ( 0 ) );

    results.push_back( alpha );
    results.push_back( xmin );
    results.push_back( sum_first_part - sum_second_part );
  }

  /**
   * Continues Maximum likelihood estimation.
   */

  void Powerlaw::MleReal( const VectorType& x, bool nosmall, bool finiteSize, VectorType& results )
  {
    VectorType xmins;
    Unique( x, xmins );
    bool finiteSizeMessagePrinted = false;

    RemoveLastElement( xmins );

    VectorType dat( xmins.size(), 0 );

    VectorType sorted_x;

    Sort( x, sorted_x );

    for ( unsigned int xm = 0; xm < xmins.size(); xm++ )
    {
      VectorType z( sorted_x );

      ValueType xmin = xmins[xm];

      KeepHigherOrEqual( z, xmin );
      VectorType tmp( z ); // backup for later in fuction...

      ValueType n = (ValueType) z.size();

      // estimate alpha using direct MLE
      std::transform( z.begin(), z.end(), z.begin(), std::bind2nd( log_div< ValueType > (), xmin ) );

      ValueType a = static_cast< ValueType > ( n ) / std::accumulate( z.begin(), z.end(), static_cast< ValueType > ( 0 ) );

      if ( nosmall )
      {
        if ( ( static_cast< ValueType > ( a ) - 1 ) / sqrt( static_cast< ValueType > ( n ) ) > 0.1 )
        {
          dat.erase( dat.begin() + xm, dat.end() );
          xm = (unsigned int) xmins.size() + 1;
          break;
        }
      }

      // compute KS statistic
      VectorType cx( n );
      for(int i = 0; i < cx.size(); ++i) cx[i] = i;
      std::transform( cx.begin(), cx.end(), cx.begin(), std::bind2nd( std::divides< ValueType >(), n ) );

      // cf = xmin / z ...
      VectorType cf( tmp.size(), xmin );
      std::transform( cf.begin(), cf.end(), tmp.begin(), cf.begin(), std::divides< ValueType >() );

      // cf = 1 - ( xmin / z ) ^ a ...
      std::transform( cf.begin(), cf.end(), cf.begin(), std::bind2nd( power_minus_one< ValueType > (), a ) );

      // max( abs( cf - cx ) ) ...
      std::transform( cf.begin(), cf.end(), cx.begin(), cf.begin(), std::minus< ValueType >() );
      dat[xm] = *( std::max_element( cf.begin(), cf.end() ) );
    }

    // D = min(dat) ...
    ValueType D = *( std::min_element( dat.begin(), dat.end() ) );

    // xmin  = xmins( find( dat <= D, 1, 'first' ) ) ...
    typename VectorType::iterator new_end = std::find_if( dat.begin(), dat.end(), std::bind2nd( std::less_equal< ValueType >(), D ) );

    ValueType xmin = xmins[std::distance( dat.begin(), new_end )];

    // z = x( x >= xmin ) ...
    VectorType z( x );
    KeepHigherOrEqual( z, xmin );

    unsigned int n = (unsigned int) z.size();

    // alpha = 1 + n ./ sum( log(z./xmin) ) ...
    VectorType tmp( z.size(), xmin );
    std::transform( z.begin(), z.end(), tmp.begin(), z.begin(), log_div< ValueType > () );
    ValueType sum = std::accumulate( z.begin(), z.end(), static_cast< ValueType > ( 0 ) );
    ValueType alpha = 1.0 + z.size() / sum;

    // finite-size correction
    if ( finiteSize )
    {
      alpha = alpha * ( static_cast< ValueType > ( n ) - 1 ) / static_cast< ValueType > ( n ) + 1 / static_cast< ValueType > ( n );
    }

    if ( !finiteSize && ( n < 50 ) && ( !finiteSizeMessagePrinted ) )
    {
      std::cout << "*** WARNING ***: finite-size bias may be present!" << std::endl;
      finiteSizeMessagePrinted = true;
    }

    // log-likelihood: L = n*log((alpha-1)/xmin) - alpha.*sum(log(z./xmin));
    ValueType L = n * std::log( ( alpha - 1 ) / xmin ) - alpha * sum;

    results.push_back( alpha );
    results.push_back( xmin );
    results.push_back( L );
  }

  /**
   * Run bootstrapping of mle n-times and return Vector with indices:
   *
   * 0: average alpha
   * 1: average xmin
   * 2: average L
   *
   * 3: sd alpha
   * 4: sd xmin
   * 5: sd L
   *
   * Empty VectorType is returned when data is corrupt.
   */

  void Powerlaw::Bootstrap( const VectorType& inputs, bool nosmall, bool finiteSize, ValueType startValue,
                           ValueType increment, ValueType endValue, bool discrete, unsigned int n, VectorType& results, bool verbose )
  {
    VectorType all_alpha;
    VectorType all_xmin;
    VectorType all_L;

    if ( verbose )
      std::cout << "*** INFO ***: boostrapping done (%): " << std::endl;

    unsigned int successfulBootstraps = 0;

    for ( unsigned int i = 0; i < n; i++ )
    {
      if ( verbose )
      {
        std::cout << ( static_cast< ValueType > ( i ) / static_cast< ValueType > ( n ) ) * 100;
        std::cout.flush();
        std::cout << '\r';
      }

      VectorType random_inputs;
      GetRandomValue( inputs, random_inputs );

      VectorType run;

      Mle( random_inputs, nosmall, finiteSize, startValue, increment, endValue, discrete, run );

      if ( !run.empty() )
      {
        all_alpha.push_back( run[0] );
        all_xmin.push_back( run[1] );
        all_L.push_back( run[2] );
        successfulBootstraps++;
      }
    }

    if ( successfulBootstraps != n )
    {
      ValueType p = ( static_cast< ValueType > ( successfulBootstraps ) / static_cast< ValueType > ( n ) ) * 100;
      std::cerr << "*** WARNING ***: bootstrapping only ran partially "
      "-> (" << p << " %)." << std::endl;
    }

    if ( !all_alpha.empty() )
    {
      ValueType average_alpha = std::accumulate( all_alpha.begin(), all_alpha.end(), static_cast< ValueType > ( 0 ) )
      / all_alpha.size();
      ValueType average_xmin = std::accumulate( all_xmin.begin(), all_xmin.end(), static_cast< ValueType > ( 0 ) ) / all_xmin.size();
      ;
      ValueType average_L = std::accumulate( all_L.begin(), all_L.end(), static_cast< ValueType > ( 0 ) ) / all_L.size();
      ;
      
      ValueType sd_alpha = GetSD( all_alpha );
      ValueType sd_xmin = GetSD( all_xmin );
      ValueType sd_L = GetSD( all_L );
      
      results.push_back( average_alpha );
      results.push_back( average_xmin );
      results.push_back( average_L );
      
      results.push_back( sd_alpha );
      results.push_back( sd_xmin );
      results.push_back( sd_L );
    }
  }
  
  /**
   * Maximum likelihood estimation.
   *
   * If input is not correct an empty VectorType will be returned!
   */
  
  void Powerlaw::Mle( const VectorType& inputs, bool nosmall, bool finiteSize, ValueType startValue, ValueType increment,
                     ValueType endValue, bool discrete, VectorType& results )
  {
    // check if at all inputs are not identical ...
    if ( std::max_element( inputs.begin(), inputs.end() ) == std::min_element( inputs.begin(), inputs.end() ) )
    {
      return;
    }
    
    if ( discrete )
    {
      if ( startValue > 1.0 )
        MleInt( inputs, nosmall, finiteSize, startValue, increment, endValue, results );
      
      else
        std::cerr << "*** ERROR ***: start-value should be higher than 1.0!" << std::endl;
      
    } else
      MleReal( inputs, nosmall, finiteSize, results );
  }
} // end namespace graph

