#include "utilities.h"
#include "header.h"

#include <fstream> // fscanf, fopen, ofstream
#include <sstream>
#include <iostream>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>


/**
 * WOR sampling index from vectorIndex
 * Use Fisher-Yates shuffle algorithm
 *
 * @param vectorIndex
 * @param K
 * @return random k indexes
 */
//vector<int> samplingWOR(vector<int> vectorIndex, int K)
//{
//    if ( K >= (int)vectorIndex.size() )
//        return vectorIndex;
//
//    vector<int>::iterator iterFirst, iterRandom;
//    iterFirst = vectorIndex.begin();
//    int left = vectorIndex.size() - 1;
//
//    /**
//    int i, j;
//    for (i = K - 1; i > 0; i--)
//    {
//        // Pick a random index from 0 to i
//        j = rand() % (i + 1);
//
//        // Swap arr[i] with the element
//        // at random index
//        iter_swap(vectorIndex.begin() + i, vectorIndex.begin() + j);
//    }
//
//    return vector<int>(vectorIndex.begin(), vectorIndex.begin() + K);
//    **/
//
//    while (K--)
//    {
//        //cout << *iterFirst << endl;
//        iterRandom = iterFirst;
//
//        // increment iterRandom by a random position
////        advance(iterRandom, intUnifRand(0, left - 1));
//        advance(iterRandom, rand() % left);
//
//        //cout << *iterRandom << endl;
//        // Swap value
//        swap(*iterFirst, *iterRandom);
//        //cout << *iterFirst << endl;
//        //cout << *iterRandom << endl;
//
//        // Increase the iterFirst
//        ++iterFirst;
//
//        // Decrease the size of vector
//        --left;
//    }
//
//    return vector<int>(vectorIndex.begin(), iterFirst);
//
//}

// Sort `distance` and reorder `indice` accordingly, in ascending order
void sort_by_distance(Ref<VectorXf> distance, Eigen::Ref<VectorXi> indice)
{
    int n = distance.size();
    std::vector<std::pair<float, int>> paired(n);

    for (int i = 0; i < n; ++i)
        paired[i] = {distance[i], indice[i]};

    std::sort(paired.begin(), paired.end());

    for (int i = 0; i < n; ++i) {
        distance[i] = paired[i].first;
        indice[i]  = paired[i].second;
    }
}

/**
 * Generate random bit for FHT
 *
 * @param p_iNumBit
 * @param bitHD
 * @param random_seed
 * return bitHD that contains fhtDim * n_rotate (default of n_rotate = 3)
 */
void bitHD3Generator(int p_iNumBit, int random_seed, boost::dynamic_bitset<> & bitHD)
{
    unsigned seed = std::random_device{}(); // chrono::system_clock::now().time_since_epoch().count();
    if (random_seed >= 0)
        seed = random_seed;

    // std::random_device rd;  // Seed source
    std::mt19937 generator(seed); // Mersenne Twister engine seeded with rd()
    // default_random_engine generator(seed);

    uniform_int_distribution<uint32_t> unifDist(0, 1);

    bitHD = boost::dynamic_bitset<> (p_iNumBit);

    // Loop col first since we use col-wise
    for (int d = 0; d < p_iNumBit; ++d)
    {
        bitHD[d] = unifDist(generator) & 1;
    }

}

/**
 * Generate 2 vectors of random sign, each for one layer.
 * We use boost::bitset for saving space
 * @param p_iNumBit = L * 3 * Length (3 rotation, 2 layers, each with L tables)
 */
void bitHD3Generator2(int p_iNumBit, int random_seed, boost::dynamic_bitset<> & bitHD1, boost::dynamic_bitset<> & bitHD2)
{
    // std::random_device rd;  // Seed source
    unsigned seed = std::random_device{}(); // chrono::system_clock::now().time_since_epoch().count();
    if (random_seed > -1) // then use the assigned seed
        seed = random_seed;

    std::mt19937 generator(seed); // Mersenne Twister engine seeded with rd()
    // default_random_engine generator(seed);

    uniform_int_distribution<uint32_t> unifDist(0, 1);

    bitHD1 = boost::dynamic_bitset<> (p_iNumBit);
    bitHD2 = boost::dynamic_bitset<> (p_iNumBit);

    for (int d = 0; d < p_iNumBit; ++d)
    {
        bitHD1[d] = unifDist(generator) & 1;
        bitHD2[d] = unifDist(generator) & 1;

    }

    // for (int i = 0; i < 20; i++)
    // {
    //     cout << bitHD1[i] << endl;
    //     cout << bitHD2[i] << endl;
    // }
}


/**
 * Generate Gaussian distribution N(mean, stddev)
 *
 * @param p_iNumRows
 * @param p_iNumCols
 * @param mean
 * @param stddev
 * @param random_seed
 * @return a matrix of size numRow x numCol
 */
MatrixXf gaussGenerator(int p_iNumRows, int p_iNumCols, float mean, float stddev, int random_seed)
{
    MatrixXf MATRIX_G = MatrixXf::Zero(p_iNumRows, p_iNumCols);

    unsigned seed = std::random_device{}(); // chrono::system_clock::now().time_since_epoch().count();
    if (random_seed >= 0)
        seed = random_seed;

    // std::random_device rd;  // Seed source
    std::mt19937 generator(seed); // Mersenne Twister engine seeded with rd()
    // default_random_engine generator(seed);

    normal_distribution<float> normDist(mean, stddev);

    // Always iterate col first, then row later due to the col-wise storage
    for (int c = 0; c < p_iNumCols; ++c)
        for (int r = 0; r < p_iNumRows; ++r)
            MATRIX_G(r, c) = normDist(generator);

    return MATRIX_G;
}

/**
 * Generate Cauchy distribution C(x0, gamma)
 *
 * @param p_iNumRows
 * @param p_iNumCols
 * @param x0
 * @param gamma
 * @param random_seed
 * @return a matrix of size numRow x numCol
 */
MatrixXf cauchyGenerator(int p_iNumRows, int p_iNumCols, float x0, float gamma, int random_seed)
{
    MatrixXf MATRIX_C = MatrixXf::Zero(p_iNumRows, p_iNumCols);

    unsigned seed = std::random_device{}(); // chrono::system_clock::now().time_since_epoch().count();
    if (random_seed >= 0)
        seed = random_seed;

    // std::random_device rd;  // Seed source
    std::mt19937 generator(seed); // Mersenne Twister engine seeded with rd()
    // default_random_engine generator(seed);

    cauchy_distribution<float> cauchyDist(x0, gamma); // {x0 /* a */, 𝛾 /* b */}

//    MATRIX_C = MatrixXf::Zero(p_iNumRows, p_iNumCols);

    // Always iterate col first, then row later due to the col-wise storage
    for (int c = 0; c < p_iNumCols; ++c)
        for (int r = 0; r < p_iNumRows; ++r)
            MATRIX_C(r, c) = cauchyDist(generator);

    return MATRIX_C;
}

/**
 *
 * @param p_Labels
 * @param p_sOutputFile
 */
void outputLabels(const IVector & p_Labels, const string& p_sOutputFile)
{
//	cout << "Outputing File..." << endl;
    ofstream myfile(p_sOutputFile);

    //cout << p_matKNN << endl;

    for (auto const& i : p_Labels)
    {
        myfile << i << '\n';
    }

    myfile.close();
//	cout << "Done" << endl;
}

void outputIndices(const vector<IVector> & p_vecKNN, const string& p_sOutputFile)
{
    ofstream myfile(p_sOutputFile);

    for (int n = 0; n < (int)p_vecKNN.size(); ++n)
    {
        for (int d = 0; d < (int)p_vecKNN[n].size(); ++d)
        {
            myfile << p_vecKNN[n][d] << " ";
        }
        myfile << '\n';
    }

    myfile.close();
    //	cout << "Done" << endl;
}

void outputNeighbors(const vector<FVector> & p_vecKNN, const string& p_sOutputFile)
{
    ofstream myfile(p_sOutputFile);

    for (int n = 0; n < (int)p_vecKNN.size(); ++n)
    {
        for (int d = 0; d < (int)p_vecKNN[n].size(); ++d)
        {
            myfile << p_vecKNN[n][d] << " ";
        }
        myfile << '\n';
    }

    myfile.close();
    //	cout << "Done" << endl;
}

/**
 *
 * @param p_vecOrder
 * @param p_vecDist
 * @param p_sOutputFile
 */
void outputOptics(const IVector & p_vecOrder, const FVector & p_vecDist, const string& p_sOutputFile)
{
//	cout << "Outputing File..." << endl;

    ofstream myfile(p_sOutputFile);


    for (int n = 0; n < (int)p_vecOrder.size(); ++n)
    {
        myfile << p_vecOrder[n] << " " << p_vecDist[n] << '\n';
    }

    myfile.close();
//	cout << "Done" << endl;
}

/**
 *
 * @param p_vecPoint
 * @param p_vecEmbed
 * @param kerEmbed
 * @param numDim
 * @param kerIntervalSamp
 */
void embedChi2(const Ref<VectorXf>& p_vecPoint, Ref<VectorXf> p_vecEmbed,
                    int kerEmbed, int numDim, float kerIntervalSamp)
{
    int iComponent = (kerEmbed / numDim) - 1; // kappa_1, kappa_2, ...
    iComponent /= 2; // since we take cos and sin

//    cout << "Number of components: " << iComponent << endl;

    // adding sqrt(x L kappa(0)
    for (int d = 0; d < numDim; ++d)
    {
        // Only deal with non zero
        if (p_vecPoint[d] > 0)
            p_vecEmbed[d] = sqrt(p_vecPoint[d] * kerIntervalSamp);
    }

    // adding other component
    for (int i = 1; i <= iComponent; ++i)
    {
        // We need the first D for kappa_0, 2D for kappa_1, 2D for kappa_2, ...
        int iBaseIndex = numDim + (i - 1) * 2 * numDim;

        for (int d = 0; d < numDim; ++d)
        {
            if (p_vecPoint[d] > 0)
            {
                float fFactor = sqrt(2.0 * p_vecPoint[d] * kerIntervalSamp / cosh(PI * i * kerIntervalSamp));

                p_vecEmbed[iBaseIndex + d] = fFactor * cos(i * kerIntervalSamp * log(p_vecPoint[d]));
                p_vecEmbed[iBaseIndex + numDim + d] = fFactor * sin(i * kerIntervalSamp * log(p_vecPoint[d]));
            }
        }
    }
}

/**
 *
 * @param p_vecPoint
 * @param p_vecEmbed
 * @param kerEmbed
 * @param numDim
 * @param kerIntervalSamp
 */
void embedJS(const Ref<VectorXf>& p_vecPoint, Ref<VectorXf> p_vecEmbed,
             int kerEmbed, int numDim, float kerIntervalSamp)
{
    int iComponent = (kerEmbed / numDim) - 1; // kappa_1, kappa_2, ...
    iComponent /= 2; // since we take cos and sin

    // adding sqrt(x L kappa(0)
    for (int d = 0; d < numDim; ++d)
    {
        // Only deal with non zero
        if (p_vecPoint[d] > 0)
            p_vecEmbed[d] = sqrt(p_vecPoint[d] * kerIntervalSamp * 2.0 / log(4));
    }

    // adding other component
    for (int i = 1; i <= iComponent; ++i)
    {
        // We need the first D for kappa_0, 2D for kappa_1, 2D for kappa_2, ...
        int iBaseIndex = numDim + (i - 1) * 2 * numDim;

        for (int d = 0; d < numDim; ++d)
        {
            if (p_vecPoint[d] > 0)
            {
                // this is kappa(jL)
                float fFactor = 2.0 / (log(4) * (1 + 4 * (i * kerIntervalSamp) * (i * kerIntervalSamp)) * cosh(PI * i * kerIntervalSamp));

                // This is sqrt(2X hkappa)
                fFactor = sqrt(2.0 * p_vecPoint[d] * kerIntervalSamp * fFactor);

                p_vecEmbed[iBaseIndex + d] = fFactor * cos(i * kerIntervalSamp * log(p_vecPoint[d]));
                p_vecEmbed[iBaseIndex + numDim + d] = fFactor * sin(i * kerIntervalSamp * log(p_vecPoint[d]));
            }
        }
    }
}

float pointDistance(float a, float b){
    return std::abs(a - b);
}

/** Useful for dense vector
**/
float computeDist(const Ref<VectorXf> & p_vecX, const Ref<VectorXf> & p_vecY, const string& dist)
{
    if (dist == "Cosine")
        return 1 - p_vecX.dot(p_vecY);
    if (dist == "L1")
        return (p_vecX - p_vecY).cwiseAbs().sum();
    else if (dist == "L2")
        return (p_vecX - p_vecY).norm();
    else if (dist == "Chi2") // ChiSquare
    {
        // hack for vectorize to ensure no zero element
        VectorXf vecX = p_vecX;
        VectorXf vecY = p_vecY;

        vecX.array() += EPSILON;
        vecY.array() += EPSILON;

        VectorXf temp = vecX.cwiseProduct(vecY); // x * y
        temp = temp.cwiseQuotient(vecX + vecY); // (x * y) / (x + y)
        temp.array() *= 2.0; // 2(x * y) / (x + y)

        return 1.0 - temp.sum();
    }

    else if (dist == "JS") // Jensen Shannon
    {
        // hack for vectorize
        VectorXf vecX = p_vecX;
        VectorXf vecY = p_vecY;

        vecX.array() += EPSILON;
        vecY.array() += EPSILON;

        VectorXf vecTemp1 = (vecX + vecY).cwiseQuotient(vecX); // (x + y) / x
        vecTemp1 = vecTemp1.array().log() / log(2.0); // log2( (x+y) / x))
        vecTemp1 = vecTemp1.cwiseProduct(vecX); // x * log2( (x+y) / x))

//        cout << vecTemp1.sum() / 2 << endl;

        VectorXf vecTemp2 = (vecX + vecY).cwiseQuotient(vecY);
        vecTemp2 = vecTemp2.array().log() / log(2.0);
        vecTemp2 = vecTemp2.cwiseProduct(vecY);

//        cout << vecTemp2.sum() / 2 << endl;

        return 1.0 - (vecTemp1 + vecTemp2).sum() / 2.0;
    }
    else if (dist == "DTW")     // Dynamic Time Warping
    {
        int n1 = p_vecX.size();
        int n2 = p_vecY.size();
        vector<vector<float>> dp(n1 + 1, vector<float>(n2 + 1, numeric_limits<float>::infinity()));
        dp[0][0] = 0;
        for (int i = 1; i <= n1; i++)
        {
            for (int j = 1; j <= n2; j++)
            {
                float cost = pointDistance(p_vecX[i-1], p_vecY[j-1]);
                dp[i][j] = cost + std::min(std::min(dp[i-1][j], dp[i][j-1]), dp[i-1][j-1]);
            }
        }
        // std::cout << "This distance is: " << dp[n1][n2] << endl;
        return dp[n1][n2];
        // return 1 - p_vecX.dot(p_vecY) + 0.1;
    }
    else if (dist == "KL")
    {
        VectorXf vecX = p_vecX;
        VectorXf vecY = p_vecY;

        vecX.array() += EPSILON;
        vecY.array() += EPSILON;

        VectorXf vecX_norm = vecX / vecX.sum();
        VectorXf vecY_norm = vecY / vecY.sum();

        float kl_dist = 0.0;
        for (int i = 0; i < vecX_norm.size(); i++)
        {
            kl_dist += vecX_norm(i) * (log(vecX_norm(i) / vecY_norm(i)));
        }
        return kl_dist;
    }
    else if (dist == "Wass")
    {
        VectorXf vecX_sorted = p_vecX;
        VectorXf vecY_sorted = p_vecY;
        std::sort(vecX_sorted.data(), vecX_sorted.data() + vecX_sorted.size());
        std::sort(vecY_sorted.data(), vecY_sorted.data() + vecY_sorted.size());

        float wass_dist = 0.0;
        for (int i = 0; i < vecX_sorted.size(); i++)
        {
            wass_dist += abs(vecX_sorted[i] - vecY_sorted[i]);
        }
        wass_dist /= vecX_sorted.size();
        return wass_dist;
    }
    else
    {
        cout << "Error: The distance is not support" << endl;
        return 0;
    }
}

/** Faster with sparse representation
**/
float computeChi2(const Ref<VectorXf>& vecX, const Ref<VectorXf>& vecY)
{
    float dist = 0.0;
    for (int d = 0; d < vecX.size(); ++d)
    {
        if ((vecX(d) > 0) && (vecY(d) > 0))
            dist += 2 * vecX(d) * vecY(d) / (vecX(d) + vecY(d));
    }

    return 1.0 - dist;

}

/**
 * Load data (each line is a point) into MatrixXf of size D x N format
 * Check the supporting distance and apply normalization (cosine, chi2, JS)
 *
 * @param dataset
 * @param distance
 * @param numPoints
 * @param numDim
 * @param MATRIX_X
 */
void loadtxtData(const string& dataset, const string& distance, int numPoints, int numDim, RowMajorMatrixXf & MATRIX_X) {

    FILE *f = fopen(dataset.c_str(), "r");
    if (!f) {
        cerr << "Error: Data file does not exist !" << endl;
        exit(1);
    }

    // Important: If use a temporary vector to store data, it doubles the memory
    // MATRIX_X = MatrixXf::Zero(numDim, numPoints); // default is col-major
    MATRIX_X = RowMajorMatrixXf::Zero(numPoints, numDim); // row-wise

    // Each line is a vector of d dimensions
    for (int n = 0; n < numPoints; ++n) {
        for (int d = 0; d < numDim; ++d) {
            // fscanf(f, "%f", &MATRIX_X(d, n)); // col-major
            fscanf(f, "%f", &MATRIX_X(n, d)); // row-major
        }
    }

    cout << "Finish reading data" << endl;

    //        MATRIX_X.transpose();
    //        cout << "X has " << MATRIX_X.rows() << " rows and " << MATRIX_X.cols() << " cols " << endl;

    /**
    Print the first col (1 x N)
    Print some of the first elements of the MATRIX_X to see that these elements are on consecutive memory cell.
    **/
    //        cout << MATRIX_X.row(0) << endl << endl;
    //        cout << "In memory (col-major):" << endl;
    //        for (n = 0; n < 10; n++)
    //            cout << *(MATRIX_X.data() + n) << "  ";
    //        cout << endl << endl;

    cout << "Now checking the condition of data given the distance." << endl;
    transformData(MATRIX_X, distance);
}

void loadbinData(const string& dataset, const string& distance, int numPoints, int numDim, RowMajorMatrixXf & MATRIX_X) {

    // Open file
    int fd = open(dataset.c_str(), O_RDONLY);
    if (fd < 0) {
        perror("open");
        exit(1);
    }

    // Get file size
    struct stat sb;
    if (fstat(fd, &sb) == -1) {
        perror("fstat");
        exit(1);
    }

    size_t filesize = sb.st_size;
    size_t total_rows = filesize / (numDim * sizeof(float));

    std::cout << "Total rows = " << total_rows << std::endl;

    // Map the file into memory
    void* mapped = mmap(NULL, filesize, PROT_READ, MAP_PRIVATE, fd, 0);
    if (mapped == MAP_FAILED) {
        perror("mmap");
        exit(1);
    }

    close(fd); // fd no longer needed

    if ((size_t)numPoints > total_rows) {
        std::cerr << "Error: numPoints exceeds the number of rows in the file." << std::endl;
        munmap(mapped, filesize);
        exit(1);
    }

    // Important: If use a temporary vector to store data, it doubles the memory
    // MATRIX_X = MatrixXf::Zero(numDim, numPoints); // default is col-major
    MATRIX_X = RowMajorMatrixXf::Zero(numPoints, numDim);

    // Interpret data as float array
    float* data = reinterpret_cast<float*>(mapped);

    // Each line is a vector of d dimensions
    for (int n = 0; n < numPoints; ++n) {
        for (int d = 0; d < numDim; ++d) {
            // MATRIX_X(d, n) = data[n * numDim + d]; // col-major
            MATRIX_X(n, d) = data[n * numDim + d]; // row-major
        }
    }


    // Unmap when done
    munmap(mapped, filesize);

    cout << "Finish reading data" << endl;

    //        MATRIX_X.transpose();
    //        cout << "X has " << MATRIX_X.rows() << " rows and " << MATRIX_X.cols() << " cols " << endl;

    /**
    Print the first col (1 x N)
    Print some of the first elements of the MATRIX_X to see that these elements are on consecutive memory cell.
    **/
    //        cout << MATRIX_X.row(0) << endl << endl;
    //        cout << "In memory (col-major):" << endl;
    //        for (n = 0; n < 10; n++)
    //            cout << *(MATRIX_X.data() + n) << "  ";
    //        cout << endl << endl;

    cout << "Now checking the condition of data given the distance." << endl;
    transformData(MATRIX_X, distance);
}

/**
 * Normalize data to support the distance (only needed for Cosine, Chi2, JS)
 * @param MATRIX_X
 * @param distance
 */
void transformData(RowMajorMatrixXf & MATRIX_X, const string& distance)
{
    // Check support distance
    // Doing cross-check for normalize points with cosine, and non-negative values for Chi2 and JS
    int numPoints = MATRIX_X.rows();

    if (distance == "Cosine")
    {
#pragma omp parallel for
        for (int n = 0; n < numPoints; ++n)
            MATRIX_X.row(n) /= MATRIX_X.row(n).norm(); // or MATRIX_X.rowwise().normalize() inplace but not multi-threading

//        cout << MATRIX_X.row(0).norm() << endl;
//        cout << MATRIX_X.row(10).norm() << endl;
//        cout << MATRIX_X.row(100).norm() << endl;
    }
    else if ((distance == "Chi2") || (distance == "JS"))
    {
        // Ensure non-negative
        if (MATRIX_X.minCoeff() < 0)
        {
            cerr << "Error: X is not non-negative !" << endl;
            exit(1);
        }
        else // normalize to have sum = 1
        {
            // Get colwise.sum is a row array, need to transpose() to make it col array
#pragma omp parallel for
            for (int n = 0; n < numPoints; ++n)
            {
                float fSum = MATRIX_X.row(n).sum();
                if (fSum <= 0)
                {
                    cerr << "Error: There is an zero point !" << endl;
                    exit(1);
                }
                MATRIX_X.row(n) /= fSum;
            }

            // Test
//            cout << MATRIX_X.row(0).sum() << endl;
//            cout << MATRIX_X.row(10).sum() << endl;
//            cout << MATRIX_X.row(100).sum() << endl;
        }
    }

}

/*
 * @param nargs:
 * @param args:
 * @return: Parsing parameter for FalconnPP++
 */
void readParam(int nargs, char** args, sVDCParam& sParam) {

    if (nargs < 6)
    {
        cerr << "Error: Not enough parameters !" << endl;
        exit(1);
    }

    // NumPoints n
    bool bSuccess = false;
    for (int i = 1; i < nargs; i++) {
        if (strcmp(args[i], "--n_points") == 0) {
            sParam.n_points = atoi(args[i + 1]);
            cout << "Number of rows/points of X: " << sParam.n_points << endl;
            bSuccess = true;
            break;
        }
    }

    if (!bSuccess) {
        cerr << "Error: Number of rows/points is missing !" << endl;
        exit(1);
    }

    // Dimension
    bSuccess = false;
    for (int i = 1; i < nargs; i++) {
        if (strcmp(args[i], "--n_features") == 0) {
            sParam.n_features = atoi(args[i + 1]);
            cout << "Number of columns/dimensions: " << sParam.n_features << endl;
            bSuccess = true;
            break;
        }
    }
    if (!bSuccess) {
        cerr << "Error: Number of columns/dimensions is missing !" << endl;
        exit(1);
    }


    // MinPTS
    bSuccess = false;
    for (int i = 1; i < nargs; i++) {
        if (strcmp(args[i], "--minPts") == 0) {
            sParam.minPts = atoi(args[i + 1]);
            cout << "minPts: " << sParam.minPts << endl;
            bSuccess = true;
            break;
        }
    }
    if (!bSuccess) {
        cerr << "Error: minPts is missing !" << endl;
        exit(1);
    }

    // Eps
    bSuccess = false;
    for (int i = 1; i < nargs; i++) {
        if (strcmp(args[i], "--alpha") == 0) {
            sParam.alpha = atof(args[i + 1]);
            cout << "Radius eps: " << sParam.alpha << endl;
            bSuccess = true;
            break;
        }
    }

    if (!bSuccess) {
        cerr << "Error: Eps is missing !" << endl;
        exit(1);
    }

    // Verbose
    sParam.verbose = false;
    for (int i = 1; i < nargs; i++) {
        if (strcmp(args[i], "--verbose") == 0) {
            sParam.verbose = true;
            cout << "verbose = true." << endl;
            break;
        }
    }

    // Distance measurement
    bSuccess = false;
    for (int i = 1; i < nargs; i++)
    {
        if (strcmp(args[i], "--distance") == 0)
        {
            if (strcmp(args[i + 1], "Cosine") == 0)
            {
                sParam.distance = "Cosine";
                cout << "Cosine distance - no kernel embedding" << endl;
            }
            else if (strcmp(args[i + 1], "L1") == 0)
            {
                sParam.distance = "L1";
                cout << "L1 distance" << endl;
            }
            else if (strcmp(args[i + 1], "L2") == 0)
            {
                sParam.distance = "L2";
                cout << "L2 distance" << endl;
            }
            else if (strcmp(args[i + 1], "Chi2") == 0)
            {
                sParam.distance = "Chi2";
                cout << "Chi2 distance" << endl;
            }
            else if (strcmp(args[i + 1], "JS") == 0)
            {
                sParam.distance = "JS";
                cout << "Jensen-Shannon distance" << endl;
            }
            else if (strcmp(args[i + 1], "DTW") == 0)
            {
                sParam.distance = "DTW";
                cout << "Dynamic Time Warping distance" << endl;
            }
            else if (strcmp(args[i + 1], "KL") == 0)
            {
                sParam.distance = "KL";
                cout << "KL divergence distance" << endl;
            }
            else if (strcmp(args[i + 1], "Wass") == 0)
            {
                sParam.distance = "Wass";
                cout << "Wasserstein distance" << endl;
            }
            else
            {
                cout << "Use default cosine distance" << endl;
                sParam.distance = "Cosine";
            }

            bSuccess = true;
            break;
        }
    }

    if (!bSuccess) {
        cout << "Distance is missing so we use default cosine distance" << endl;
        sParam.distance = "Cosine";
    }

    // Top-K close/far random vectors
    bSuccess = false;
    for (int i = 1; i < nargs; i++) {
        if (strcmp(args[i], "--topK") == 0) {
            sParam.topK = atoi(args[i + 1]);
            cout << "TopK closest/furthest vectors: " << sParam.topK << endl;
            bSuccess = true;
            break;
        }
    }

    if (!bSuccess) {
        sParam.topK = 5;
        cout << "TopK is missing. Use default topK: " << sParam.topK << endl;
    }

    // m >= MinPts
    bSuccess = false;
    for (int i = 1; i < nargs; i++) {
        if (strcmp(args[i], "--topM") == 0) {
            sParam.topM = atoi(args[i + 1]);
            cout << "TopM: " << sParam.topM << endl;
            bSuccess = true;
            break;
        }
    }

    if (!bSuccess) {
        sParam.topM = sParam.minPts;
        cout << "TopM is missing. Use default TopM = minPts = " << sParam.topM << endl;
    }

    // topP = topK
    bSuccess = false;
    for (int i = 1; i < nargs; i++) {
        if (strcmp(args[i], "--topP") == 0) {
            sParam.topP = atoi(args[i + 1]);
            cout << "TopP: " << sParam.topP << endl;
            bSuccess = true;
            break;
        }
    }

    if (!bSuccess) {
        sParam.topP = sParam.topK;
        cout << "TopP is missing. Use default TopP = TopK = " << sParam.topK << endl;
    }

    // Kernel embedding - it should be known before n_proj
    bSuccess = false;
    sParam.ker_n_features = sParam.n_features; // Must set default as n_features for Cosine
    for (int i = 1; i < nargs; i++) {
        if (strcmp(args[i], "--ker_n_features") == 0) {
            sParam.ker_n_features = atoi(args[i + 1]);
            cout << "Number of kernel embedded dimensions: " << sParam.ker_n_features << endl;
            cout << "If using L1 and L2, it must be an even number. " << endl;
            bSuccess = true;
            break;
        }
    }

    if (!bSuccess) {

        if ( (sParam.distance == "L1") || (sParam.distance == "L2") )
            sParam.ker_n_features = 2 * sParam.n_features; // must be an even number
        else
            sParam.ker_n_features = sParam.n_features; // default for other distance measures

        cout << "Kernel features is missing. Use default number of kernel features: " << sParam.ker_n_features << endl;
    }

    // numProjections
    bSuccess = false;
    for (int i = 1; i < nargs; i++) {
        if (strcmp(args[i], "--n_proj") == 0) {
            sParam.n_proj = atoi(args[i + 1]);
            cout << "Number of projections: " << sParam.n_proj << endl;
            bSuccess = true;
            break;

        }
    }

    // Depending on the distance, we set the relevant # projections
    if (!bSuccess) {
        if (sParam.distance == "Cosine")
        {
            int iTemp = ceil(log2(1.0 * sParam.n_features));
            sParam.n_proj = max(256, 1 << iTemp);
            cout << "Number of projections is missing. Use number of projections: " << sParam.n_proj << endl;
        }
        else
        {
            int iTemp = ceil(log2(1.0 * sParam.ker_n_features));
            sParam.n_proj = max(256, 1 << iTemp);
            cout << "Number of projections is missing. Use number of projections: " << sParam.n_proj << endl;
        }
    }

    // Will be set internally in the sDbscan class
    // Identify PARAM_INTERNAL_FWHT_PROJECTION to use FWHT in case the setting is not power of 2
//    if (sParam.distance == "Cosine")   {
//        if (sParam.n_proj <= sParam.n_features)
//            sParam.fhtDim = 1 << int(ceil(log2(sParam.n_features)));
//        else
//            sParam.fhtDim = 1 << int(ceil(log2(sParam.n_proj)));
//    }
//    else // the rest uses kernel embedding
//    {
//        if (sParam.n_proj <= sParam.ker_n_features)
//            sParam.fhtDim = 1 << int(ceil(log2(sParam.ker_n_features)));
//        else
//            sParam.fhtDim = 1 << int(ceil(log2(sParam.n_proj)));
//    }

    // Scale sigma of kernel L2 and L1
    bSuccess = false;
    for (int i = 1; i < nargs; i++)
    {
        if (strcmp(args[i], "--ker_sigma") == 0)
        {
            sParam.ker_sigma = atof(args[i + 1]);
            cout << "Sigma: " << sParam.ker_sigma << endl;
            bSuccess = true;
            break;
        }
    }

    if (!bSuccess)
    {
        if (sParam.distance == "L1")
        {
            // TODO: compute a sample kNN distance here
            sParam.ker_sigma = 1;
            cout << "Sigma is missing. Use default sigma = 1 for L1" << endl;
            cout << "Recommend sigma = avg minPts-NN distances of 100 points" << endl;
        }
        else if (sParam.distance == "L2")
        {
            sParam.ker_sigma = 2;
            cout << "Sigma is missing. Use default sigma = 2" << endl;
            cout << "Recommend sigma = avg minPts-NN distances of 100 points" << endl;
        }
    }

    // Sampling ratio used on Chi2 and JS - TPAMI 12 (interval_sampling in scikit-learn)
    bSuccess = false;
    for (int i = 1; i < nargs; i++)
    {
        if (strcmp(args[i], "--ker_intervalSampling") == 0)
        {
            sParam.ker_intervalSampling = atof(args[i + 1]);
            cout << "Sampling ratio for divergence distance: " << sParam.ker_intervalSampling << endl;
            bSuccess = true;
            break;
        }
    }

    if (!bSuccess && ((sParam.distance == "Chi2") || (sParam.distance == "JS")))
    {
        sParam.ker_intervalSampling = 0.4;
        cout << "Interval sampling ratio is missing. Use default sampling ratio for Chi2 and JS distances: " << sParam.ker_intervalSampling << endl;
    }

    // Output
    for (int i = 1; i < nargs; i++)
    {
        if (strcmp(args[i], "--output") == 0)
        {
            sParam.output = args[i + 1];
            break;
        }
    }

    if (sParam.output.empty())
    {
        cout << "No output file" << endl;
    }

    // number of threads
    bSuccess = false;
    for (int i = 1; i < nargs; i++) {
        if (strcmp(args[i], "--n_threads") == 0) {
            sParam.n_threads = atoi(args[i + 1]);
            cout << "Number of threads: " << sParam.n_threads << endl;
            bSuccess = true;
            break;
        }
    }
    if (!bSuccess) {
        sParam.n_threads = -1;
        cout << "Number of threads is missing. Use all threads: " << sParam.n_threads << endl;
    }


    // Sampling ratio used on sngDbscan and sDbscan-1NN
    bSuccess = false;
    for (int i = 1; i < nargs; i++)
    {
        if (strcmp(args[i], "--random_seed") == 0)
        {
            sParam.seed = atoi(args[i + 1]);
            cout << "Seed: " << sParam.seed << endl;
            bSuccess = true;
            break;
        }
    }

    if (!bSuccess)
    {
        sParam.seed = -1;
        cout << "Use a random seed: " << sParam.seed << endl;
    }

}

