// Copyright (c) 2016, the SDSL Project Authors.  All rights reserved.
// Please see the AUTHORS file for details.  Use of this source code is governed
// by a BSD license that can be found in the LICENSE file.
/*!\file coder_comma.hpp
 * \brief coder_comma.hpp contains the class sdsl::coder::comma
 * \author Uwe Baier
 */
#ifndef SDSL_CODER_COMMA_INCLUDED
#define SDSL_CODER_COMMA_INCLUDED

#include <algorithm>
#include <array>
#include <stddef.h>
#include <stdint.h>

#include <sdsl/bits.hpp>
#include <sdsl/config.hpp>

namespace sdsl
{

namespace coder
{

//! A class to encode and decode between comma code and binary code.
/*!\author Uwe Baier
 *
 *    Comma coding works as the following:
 *    First of all, comma coding needs a parameter t_width which indicates
 *    how big a encoded digit will be (in bits), let's say t_width = 2.
 *    By use of t_width one can calculate a base for encoding, in detail
 *    this means
 *    base = 2^t_width - 1
 *    now, given any number it is encoded as the follows: The number gets displayed
 *    in the calculated base, and each digit of the number is saved with t_width bits.
 *    To indicate the end of the number, a termination digit is used (namely this is
 *    the value base).
 *    Example:
 *    t_width = 2 => base = 2^2 - 1 = 3
 *    Value to be encoded: 15
 *    15 (base 10) = 120 (base 3)
 *    Encoded value: 120 (plus termination digit) = 01 10 00 11 in binary
 *    (last digit is termination digit)
 *
 *  \tparam t_width Width of one digit used in comma code
 */
template <uint8_t t_width = 2>
class comma
{
private:
    static_assert(t_width > 1 && t_width <= 32, "comma coder: Width must be in interval [2,32]");

    static constexpr size_t base_fits_in_64(uint32_t base, uint64_t product, size_t res)
    {
        return product == 0 ? res : base_fits_in_64(base, product / base, res + 1);
    }

    // base in which numbers are coded
    static const uint32_t base = (1 << t_width) - 1;

    // table needed for computation of encoding lengths.
    // table contains entries of the kind (index, base^index)
    // to know how much digits a number needs to be encoded.
    static constexpr size_t codelentbllen = base_fits_in_64(base, 0xFFFFFFFFFFFFFFFFULL, 0);

    static struct impl
    {
        std::array<uint64_t, codelentbllen> codelentbl;
        impl()
        {
            // intialize codelentbl
            uint64_t n = 1;
            for (size_t i = 0; i < codelentbllen; i++)
            {
                codelentbl[i] = n;
                n = (n << t_width) - n; // n = n * base
            }
        }
    } data;

    // helper function to encode a single number without
    // termination digit
    static void encode_in_base(uint64_t x, uint64_t *& z, uint8_t & offset);

public:
    typedef uint64_t size_type;
    static const uint8_t min_codeword_length = t_width; // 0 needs t_width bits as termination

    //// ENCODING /////////////////////////////////////////////////

    //! Get the number of bits that are necessary to encode
    //  the value w in comma code.
    /*!\param w 64bit int to get the length of its comma encoding.
     */
    static uint8_t encoding_length(uint64_t w);

    //! Encode one positive integer x to an int_vector
    //  at bit position start_idx.
    /* \param x Positive integer to encode.
     * \param z Raw data of vector to write the encoded form of x.
     * \param start_idx Beginning bit index to write the encoded form ox x in z.
     */
    static void encode(uint64_t x, uint64_t *& z, uint8_t & offset);

    //! Encode integers contained in vector v into vector z
    /* \param v vector containing positive integer values
     * \param z vector to put the encoded values
     */
    template <class int_vector>
    static bool encode(int_vector const & v, int_vector & z);

    //// DECODING /////////////////////////////////////////////////

    //! Decode n comma encoded values beginning at start_idx
    //  in the bitstring "data"
    /* \param data Bitstring
     * \param start_idx Starting index of the decoding.
     * \param n Number of values to decode from the bitstring.
     * \param it Iterator to store the values.
     */
    template <bool t_sumup, bool t_inc, class t_iter>
    static uint64_t decode(uint64_t const * data, const size_type start_idx, size_type n, t_iter it = (t_iter) nullptr);

    //! Decode n comma gamma encoded integers
    //  beginning at start_idx in the bitstring "data"
    //  and return the sum of these values.
    /*!\param data Pointer to the beginning
     * of the comma encoded bitstring.
     * \param start_idx Index of the first bit
     * to encode the values from.
     * \param n Number of values to decode from the bitstring.
     * Attention: There have to be at least n encoded
     * values in the bitstring.
     */
    static uint64_t decode_prefix_sum(uint64_t const * data, const size_type start_idx, size_type n);

    //! Decode n comma gamma encoded integers
    //  beginning at start_idx ending at end_idx (exclusive)
    //  in the bitstring "data"
    //  and return the sum of these values.
    /*!\param data Pointer to the beginning
     * of the comma encoded bitstring.
     * \param start_idx Index of the first bit
     * to encode the values from.
     * \param end_idx Index of the last bit
     * to encode the values from.
     * \param n Number of values to decode from the bitstring.
     * Attention: There have to be at least n encoded
     * values in the bitstring.
     */
    static uint64_t
    decode_prefix_sum(uint64_t const * data, const size_type start_idx, const size_type end_idx, size_type n);

    //! Decode vector z containing comma encoded integers
    //  and store them in vector v.
    /*!\param z vector that contains encoded integers.
     * \param v vector to store the decoded integers
     */
    template <class int_vector>
    static bool decode(int_vector const & z, int_vector & v);

    // interface needs this function for whatever :>
    template <class int_vector>
    static uint64_t * raw_data(int_vector & v)
    {
        return v.m_data;
    }
};

//// IMPLEMENTATION ///////////////////////////////////////////////////////////

//// CODELENGTH TABLE SETUP ///////////////////////////////

//// Encoding /////////////////////////////////////////////

template <uint8_t t_width>
typename comma<t_width>::impl comma<t_width>::data;

template <uint8_t t_width>
inline uint8_t comma<t_width>::encoding_length(uint64_t w)
{
    // use function table and binary search to determine the number of digits
    // needed to encode w in given base.
    uint8_t numdigits = std::upper_bound(data.codelentbl.begin(), data.codelentbl.end(), w) - data.codelentbl.begin();
    // finally calculate length.
    // Don't forget termination character on calculations ;)
    return (numdigits + 1) * t_width;
}

template <uint8_t t_width>
void comma<t_width>::encode_in_base(uint64_t x, uint64_t *& z, uint8_t & offset)
{
    if (x)
    {
        uint32_t digit = x % base; // get next digit
        // encode digits with higher order
        encode_in_base(x / base, z, offset);
        // and write own digit
        bits::write_int_and_move(z, digit, offset, t_width);
    }
}

template <uint8_t t_width>
inline void comma<t_width>::encode(uint64_t x, uint64_t *& z, uint8_t & offset)
{
    // encode x itself
    encode_in_base(x, z, offset);
    // and append the termination digit
    bits::write_int_and_move(z, base, offset, t_width);
}

template <uint8_t t_width>
template <class int_vector>
bool comma<t_width>::encode(int_vector const & v, int_vector & z)
{
    // first, find out how much bits vector z needs to save values
    typedef typename int_vector::size_type size_type;
    size_type z_bit_size = 0;
    for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
    {
        z_bit_size += encoding_length(*it);
    }

    // trim vector z to correct size
    z.width(v.width());
    z.bit_resize(z_bit_size); // for future may check if resizing works
    z.shrink_to_fit();

    // iterate again and save values in z
    uint64_t * z_data = z.m_data;
    uint8_t offset = 0;
    for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it)
    {
        encode(*it, z_data, offset);
    }
    return true;
}

//// DECODING /////////////////////////////////////////////

template <uint8_t t_width>
template <bool t_sumup, bool t_inc, class t_iter>
inline uint64_t comma<t_width>::decode(uint64_t const * data, const size_type start_idx, size_type n, t_iter it)
{
    data += (start_idx >> 6);          // jump to byte offset
    uint8_t offset = start_idx & 0x3F; // and calculate bit offset
    uint64_t value = 0;
    for (size_type i = 0; i < n; i++)
    {
        // read next value
        uint64_t v = 0;
        for (uint32_t digit = (uint32_t)bits::read_int_and_move(data, offset, t_width); // read first digit
             digit != base;                  // while digit is not the terminating digit
             v = (v << t_width) - v + digit, // v = v * base + digit
             digit = (uint32_t)bits::read_int_and_move(data, offset, t_width))
            ; // and read next digit
        // now decide how to handle value
        value = (t_sumup) ? value + v : v;
        if (t_inc)
            *(it++) = value;
    }
    return value;
}

template <uint8_t t_width>
uint64_t comma<t_width>::decode_prefix_sum(uint64_t const * data, const size_type start_idx, size_type n)
{
    // easiest seems to be to use already build function decode...
    return decode<true, false, int *>(data, start_idx, n);
    // Note for above: 3rd template parameter ca be any pntr except void *
}

template <uint8_t t_width>
uint64_t comma<t_width>::decode_prefix_sum(uint64_t const * data,
                                           const size_type start_idx,
                                           SDSL_UNUSED const size_type end_idx,
                                           size_type n)
{
    // end index does not change anything here...
    return decode_prefix_sum(data, start_idx, n);
}

template <uint8_t t_width>
template <class int_vector>
bool comma<t_width>::decode(int_vector const & z, int_vector & v)
{
    // check if bit size is dividable through t_width.
    if (z.bit_size() % t_width != 0)
        return false;

    // calculate num of overall digits in z (including terminating digit)
    uint64_t numOfDigits = z.bit_size() / t_width;
    // iteration vars for z vector
    uint64_t const * z_data = z.data();
    uint8_t z_offset = 0;
    // utility to count number of entries in z, and last read digit
    uint32_t digit = base;
    typename int_vector::size_type n = 0;

    // iterate over all digits. each time a termination digit is
    // detected, a encoded number in vector ends.
    while (numOfDigits--)
    {
        digit = (uint32_t)bits::read_int_and_move(z_data, z_offset, t_width);
        if (digit == base)
            n++; // termination digit detected
    }

    // also, ensure last read digit was a termination digit
    if (digit != base)
        return false;

    // resize vector v
    v.width(z.width());
    v.resize(n);
    v.shrink_to_fit();

    // and finally decode and save result in v
    decode<false, true>(z.data(), 0, n, v.begin());
    return true;
}

} // end of namespace coder
} // end of namespace sdsl

#endif
