/*
  fastcluster: Fast hierarchical clustering routines for R and Python

  Copyright © 2011 Daniel Müllner
  <http://danifold.net>

  This library implements various fast algorithms for hierarchical,
  agglomerative clustering methods:

  (1) Algorithms for the "stored matrix approach": the input is the array of
      pairwise dissimilarities.

      MST_linkage_core: single linkage clustering with the "minimum spanning
      tree algorithm (Rohlfs)

      NN_chain_core: nearest-neighbor-chain algorithm, suitable for single,
      complete, average, weighted and Ward linkage (Murtagh)

      generic_linkage: generic algorithm, suitable for all distance update
      formulas (Müllner)

  (2) Algorithms for the "stored data approach": the input are points in a
      vector space.

      MST_linkage_core_vector: single linkage clustering for vector data

      generic_linkage_vector: generic algorithm for vector data, suitable for
      the Ward, centroid and median methods.

      generic_linkage_vector_alternative: alternative scheme for updating the
      nearest neighbors. This method seems faster than "generic_linkage_vector"
      for the centroid and median methods but slower for the Ward method.

  All these implementation treat infinity values correctly. They throw an
  exception if a NaN distance value occurs.
*/

// Older versions of Microsoft Visual Studio do not have the fenv header.
#ifdef _MSC_VER
#if (_MSC_VER == 1500 || _MSC_VER == 1600)
#define NO_INCLUDE_FENV
#endif
#endif
// NaN detection via fenv might not work on systems with software
// floating-point emulation (bug report for Debian armel).
#ifdef __SOFTFP__
#define NO_INCLUDE_FENV
#endif
#ifdef NO_INCLUDE_FENV
#else
#pragma STDC FENV_ACCESS on
#include <fenv.h>
#endif

#include <algorithm>  // for std::fill_n
#include <cfloat>     // also for DBL_MAX, DBL_MIN
#include <cmath>      // for std::pow, std::sqrt
#include <cstddef>    // for std::ptrdiff_t
#include <limits>     // for std::numeric_limits<...>::infinity()
#include <stdexcept>  // for std::runtime_error
#include <string>     // for std::string
#ifndef DBL_MANT_DIG
#error The constant DBL_MANT_DIG could not be defined.
#endif
#define T_FLOAT_MANT_DIG DBL_MANT_DIG

#ifndef LONG_MAX
#include <climits>
#endif
#ifndef LONG_MAX
#error The constant LONG_MAX could not be defined.
#endif
#ifndef INT_MAX
#error The constant INT_MAX could not be defined.
#endif

#ifndef INT32_MAX
#ifdef _MSC_VER
#if _MSC_VER >= 1600
#define __STDC_LIMIT_MACROS
#include <stdint.h>
#else
typedef __int32 int_fast32_t;
typedef __int64 int64_t;
#endif
#else
#define __STDC_LIMIT_MACROS
#include <stdint.h>
#endif
#endif

#define FILL_N std::fill_n
#ifdef _MSC_VER
#if _MSC_VER < 1600
#undef FILL_N
#define FILL_N stdext::unchecked_fill_n
#endif
#endif

// Suppress warnings about (potentially) uninitialized variables.
#ifdef _MSC_VER
#pragma warning(disable : 4700)
#endif

#ifndef HAVE_DIAGNOSTIC
#if __GNUC__ > 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 6))
#define HAVE_DIAGNOSTIC 1
#endif
#endif

#ifndef HAVE_VISIBILITY
#if __GNUC__ >= 4
#define HAVE_VISIBILITY 1
#endif
#endif

/* Since the public interface is given by the Python respectively R interface,
 * we do not want other symbols than the interface initalization routines to be
 * visible in the shared object file. The "visibility" switch is a GCC concept.
 * Hiding symbols keeps the relocation table small and decreases startup time.
 * See http://gcc.gnu.org/wiki/Visibility
 */
#if HAVE_VISIBILITY
#pragma GCC visibility push(hidden)
#endif

typedef int_fast32_t t_index;
#ifndef INT32_MAX
#define MAX_INDEX 0x7fffffffL
#else
#define MAX_INDEX INT32_MAX
#endif
#if (LONG_MAX < MAX_INDEX)
#error The integer format "t_index" must not have a greater range than "long int".
#endif
#if (INT_MAX > MAX_INDEX)
#error The integer format "int" must not have a greater range than "t_index".
#endif
typedef double t_float;

/* Method codes.

   These codes must agree with the METHODS array in fastcluster.R and the
   dictionary mthidx in fastcluster.py.
*/
enum method_codes {
    // non-Euclidean methods
    METHOD_METR_SINGLE = 0,
    METHOD_METR_COMPLETE = 1,
    METHOD_METR_AVERAGE = 2,
    METHOD_METR_WEIGHTED = 3,
    METHOD_METR_WARD = 4,
    METHOD_METR_WARD_D = METHOD_METR_WARD,
    METHOD_METR_CENTROID = 5,
    METHOD_METR_MEDIAN = 6,
    METHOD_METR_WARD_D2 = 7,

    MIN_METHOD_CODE = 0,
    MAX_METHOD_CODE = 7
};

enum method_codes_vector {
    // Euclidean methods
    METHOD_VECTOR_SINGLE = 0,
    METHOD_VECTOR_WARD = 1,
    METHOD_VECTOR_CENTROID = 2,
    METHOD_VECTOR_MEDIAN = 3,

    MIN_METHOD_VECTOR_CODE = 0,
    MAX_METHOD_VECTOR_CODE = 3
};

// self-destructing array pointer
template <typename type>
class auto_array_ptr {
private:
    type *ptr;
    auto_array_ptr(auto_array_ptr const &);             // non construction-copyable
    auto_array_ptr &operator=(auto_array_ptr const &);  // non copyable
public:
    auto_array_ptr()
        : ptr(NULL) {}
    template <typename index>
    auto_array_ptr(index const size)
        : ptr(new type[size]) {}
    template <typename index, typename value>
    auto_array_ptr(index const size, value const val)
        : ptr(new type[size]) {
        FILL_N(ptr, size, val);
    }
    ~auto_array_ptr() {
        delete[] ptr;
    }
    void free() {
        delete[] ptr;
        ptr = NULL;
    }
    template <typename index>
    void init(index const size) {
        ptr = new type[size];
    }
    template <typename index, typename value>
    void init(index const size, value const val) {
        init(size);
        FILL_N(ptr, size, val);
    }
    inline operator type *() const { return ptr; }
};

struct node {
    t_index node1, node2;
    t_float dist;
};

inline bool operator<(const node a, const node b) {
    return (a.dist < b.dist);
}

class cluster_result {
private:
    auto_array_ptr<node> Z;
    t_index pos;

public:
    cluster_result(const t_index size)
        : Z(size), pos(0) {}

    void append(const t_index node1, const t_index node2, const t_float dist) {
        Z[pos].node1 = node1;
        Z[pos].node2 = node2;
        Z[pos].dist = dist;
        ++pos;
    }

    node *operator[](const t_index idx) const { return Z + idx; }

    /* Define several methods to postprocess the distances. All these functions
       are monotone, so they do not change the sorted order of distances. */

    void sqrt() const {
        for (node *ZZ = Z; ZZ != Z + pos; ++ZZ) {
            ZZ->dist = std::sqrt(ZZ->dist);
        }
    }

    void sqrt(const t_float) const {  // ignore the argument
        sqrt();
    }

    void sqrtdouble(const t_float) const {  // ignore the argument
        for (node *ZZ = Z; ZZ != Z + pos; ++ZZ) {
            ZZ->dist = std::sqrt(2 * ZZ->dist);
        }
    }

#ifdef R_pow
#define my_pow R_pow
#else
#define my_pow std::pow
#endif

    void power(const t_float p) const {
        t_float const q = 1 / p;
        for (node *ZZ = Z; ZZ != Z + pos; ++ZZ) {
            ZZ->dist = my_pow(ZZ->dist, q);
        }
    }

    void plusone(const t_float) const {  // ignore the argument
        for (node *ZZ = Z; ZZ != Z + pos; ++ZZ) {
            ZZ->dist += 1;
        }
    }

    void divide(const t_float denom) const {
        for (node *ZZ = Z; ZZ != Z + pos; ++ZZ) {
            ZZ->dist /= denom;
        }
    }
};

class doubly_linked_list {
    /*
      Class for a doubly linked list. Initially, the list is the integer range
      [0, size]. We provide a forward iterator and a method to delete an index
      from the list.

      Typical use: for (i=L.start; L<size; i=L.succ[I])
      or
      for (i=somevalue; L<size; i=L.succ[I])
    */
public:
    t_index start;
    auto_array_ptr<t_index> succ;

private:
    auto_array_ptr<t_index> pred;
    // Not necessarily private, we just do not need it in this instance.

public:
    doubly_linked_list(const t_index size)
        // Initialize to the given size.
        : start(0), succ(size + 1), pred(size + 1) {
        for (t_index i = 0; i < size; ++i) {
            pred[i + 1] = i;
            succ[i] = i + 1;
        }
        // pred[0] is never accessed!
        // succ[size] is never accessed!
    }

    ~doubly_linked_list() {}

    void remove(const t_index idx) {
        // Remove an index from the list.
        if (idx == start) {
            start = succ[idx];
        } else {
            succ[pred[idx]] = succ[idx];
            pred[succ[idx]] = pred[idx];
        }
        succ[idx] = 0;  // Mark as inactive
    }

    bool is_inactive(t_index idx) const {
        return (succ[idx] == 0);
    }
};

// Indexing functions
// D is the upper triangular part of a symmetric (NxN)-matrix
// We require r_ < c_ !
#define D_(r_, c_) (D[(static_cast<std::ptrdiff_t>(2 * N - 3 - (r_)) * (r_) >> 1) + (c_) - 1])
// Z is an ((N-1)x4)-array
#define Z_(_r, _c) (Z[(_r) * 4 + (_c)])

/*
  Lookup function for a union-find data structure.

  The function finds the root of idx by going iteratively through all
  parent elements until a root is found. An element i is a root if
  nodes[i] is zero. To make subsequent searches faster, the entry for
  idx and all its parents is updated with the root element.
 */
class union_find {
private:
    auto_array_ptr<t_index> parent;
    t_index nextparent;

public:
    union_find(const t_index size)
        : parent(size > 0 ? 2 * size - 1 : 0, 0), nextparent(size) {}

    t_index Find(t_index idx) const {
        if (parent[idx] != 0) {  // a → b
            t_index p = idx;
            idx = parent[idx];
            if (parent[idx] != 0) {  // a → b → c
                do {
                    idx = parent[idx];
                } while (parent[idx] != 0);
                do {
                    t_index tmp = parent[p];
                    parent[p] = idx;
                    p = tmp;
                } while (parent[p] != idx);
            }
        }
        return idx;
    }

    void Union(const t_index node1, const t_index node2) {
        parent[node1] = parent[node2] = nextparent++;
    }
};

class nan_error {};
#ifdef FE_INVALID
class fenv_error {};
#endif

static void MST_linkage_core(const t_index N, const t_float *const D,
                             cluster_result &Z2) {
    /*
        N: integer, number of data points
        D: condensed distance matrix N*(N-1)/2
        Z2: output data structure

        The basis of this algorithm is an algorithm by Rohlf:

        F. James Rohlf, Hierarchical clustering using the minimum spanning tree,
        The Computer Journal, vol. 16, 1973, p. 93–95.
    */
    t_index i;
    t_index idx2;
    doubly_linked_list active_nodes(N);
    auto_array_ptr<t_float> d(N);

    t_index prev_node;
    t_float min;

    // first iteration
    idx2 = 1;
    min = std::numeric_limits<t_float>::infinity();
    for (i = 1; i < N; ++i) {
        d[i] = D[i - 1];
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
#endif
        if (d[i] < min) {
            min = d[i];
            idx2 = i;
        } else if (fc_isnan(d[i]))
            throw(nan_error());
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
    }
    Z2.append(0, idx2, min);

    for (t_index j = 1; j < N - 1; ++j) {
        prev_node = idx2;
        active_nodes.remove(prev_node);

        idx2 = active_nodes.succ[0];
        min = d[idx2];
        for (i = idx2; i < prev_node; i = active_nodes.succ[i]) {
            t_float tmp = D_(i, prev_node);
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
#endif
            if (tmp < d[i])
                d[i] = tmp;
            else if (fc_isnan(tmp))
                throw(nan_error());
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
            if (d[i] < min) {
                min = d[i];
                idx2 = i;
            }
        }
        for (; i < N; i = active_nodes.succ[i]) {
            t_float tmp = D_(prev_node, i);
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
#endif
            if (d[i] > tmp)
                d[i] = tmp;
            else if (fc_isnan(tmp))
                throw(nan_error());
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
            if (d[i] < min) {
                min = d[i];
                idx2 = i;
            }
        }
        Z2.append(prev_node, idx2, min);
    }
}

/* Functions for the update of the dissimilarity array */

inline static void f_single(t_float *const b, const t_float a) {
    if (*b > a) *b = a;
}
inline static void f_complete(t_float *const b, const t_float a) {
    if (*b < a) *b = a;
}
inline static void f_average(t_float *const b, const t_float a, const t_float s, const t_float t) {
    *b = s * a + t * (*b);
#ifndef FE_INVALID
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
#endif
    if (fc_isnan(*b)) {
        throw(nan_error());
    }
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
#endif
}
inline static void f_weighted(t_float *const b, const t_float a) {
    *b = (a + *b) * .5;
#ifndef FE_INVALID
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
#endif
    if (fc_isnan(*b)) {
        throw(nan_error());
    }
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
#endif
}
inline static void f_ward(t_float *const b, const t_float a, const t_float c, const t_float s, const t_float t, const t_float v) {
    *b = ((v + s) * a - v * c + (v + t) * (*b)) / (s + t + v);
//*b = a+(*b)-(t*a+s*(*b)+v*c)/(s+t+v);
#ifndef FE_INVALID
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
#endif
    if (fc_isnan(*b)) {
        throw(nan_error());
    }
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
#endif
}
inline static void f_centroid(t_float *const b, const t_float a, const t_float stc, const t_float s, const t_float t) {
    *b = s * a - stc + t * (*b);
#ifndef FE_INVALID
    if (fc_isnan(*b)) {
        throw(nan_error());
    }
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
#endif
}
inline static void f_median(t_float *const b, const t_float a, const t_float c_4) {
    *b = (a + (*b)) * .5 - c_4;
#ifndef FE_INVALID
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
#endif
    if (fc_isnan(*b)) {
        throw(nan_error());
    }
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
#endif
}

template <method_codes method, typename t_members>
static void NN_chain_core(const t_index N, t_float *const D, t_members *const members, cluster_result &Z2) {
    /*
        N: integer
        D: condensed distance matrix N*(N-1)/2
        Z2: output data structure

        This is the NN-chain algorithm, described on page 86 in the following book:

        Fionn Murtagh, Multidimensional Clustering Algorithms,
        Vienna, Würzburg: Physica-Verlag, 1985.
    */
    t_index i;

    auto_array_ptr<t_index> NN_chain(N);
    t_index NN_chain_tip = 0;

    t_index idx1, idx2;

    t_float size1, size2;
    doubly_linked_list active_nodes(N);

    t_float min;

    for (t_float const *DD = D; DD != D + (static_cast<std::ptrdiff_t>(N) * (N - 1) >> 1);
         ++DD) {
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
#endif
        if (fc_isnan(*DD)) {
            throw(nan_error());
        }
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
    }

#ifdef FE_INVALID
    if (feclearexcept(FE_INVALID)) throw fenv_error();
#endif

    for (t_index j = 0; j < N - 1; ++j) {
        if (NN_chain_tip <= 3) {
            NN_chain[0] = idx1 = active_nodes.start;
            NN_chain_tip = 1;

            idx2 = active_nodes.succ[idx1];
            min = D_(idx1, idx2);
            for (i = active_nodes.succ[idx2]; i < N; i = active_nodes.succ[i]) {
                if (D_(idx1, i) < min) {
                    min = D_(idx1, i);
                    idx2 = i;
                }
            }
        }  // a: idx1   b: idx2
        else {
            NN_chain_tip -= 3;
            idx1 = NN_chain[NN_chain_tip - 1];
            idx2 = NN_chain[NN_chain_tip];
            min = idx1 < idx2 ? D_(idx1, idx2) : D_(idx2, idx1);
        }  // a: idx1   b: idx2

        do {
            NN_chain[NN_chain_tip] = idx2;

            for (i = active_nodes.start; i < idx2; i = active_nodes.succ[i]) {
                if (D_(i, idx2) < min) {
                    min = D_(i, idx2);
                    idx1 = i;
                }
            }
            for (i = active_nodes.succ[idx2]; i < N; i = active_nodes.succ[i]) {
                if (D_(idx2, i) < min) {
                    min = D_(idx2, i);
                    idx1 = i;
                }
            }

            idx2 = idx1;
            idx1 = NN_chain[NN_chain_tip++];

        } while (idx2 != NN_chain[NN_chain_tip - 2]);

        Z2.append(idx1, idx2, min);

        if (idx1 > idx2) {
            t_index tmp = idx1;
            idx1 = idx2;
            idx2 = tmp;
        }

        if (method == METHOD_METR_AVERAGE ||
            method == METHOD_METR_WARD) {
            size1 = static_cast<t_float>(members[idx1]);
            size2 = static_cast<t_float>(members[idx2]);
            members[idx2] += members[idx1];
        }

        // Remove the smaller index from the valid indices (active_nodes).
        active_nodes.remove(idx1);

        switch (method) {
            case METHOD_METR_SINGLE:
                /*
                Single linkage.

                Characteristic: new distances are never longer than the old distances.
                */
                // Update the distance matrix in the range [start, idx1).
                for (i = active_nodes.start; i < idx1; i = active_nodes.succ[i])
                    f_single(&D_(i, idx2), D_(i, idx1));
                // Update the distance matrix in the range (idx1, idx2).
                for (; i < idx2; i = active_nodes.succ[i])
                    f_single(&D_(i, idx2), D_(idx1, i));
                // Update the distance matrix in the range (idx2, N).
                for (i = active_nodes.succ[idx2]; i < N; i = active_nodes.succ[i])
                    f_single(&D_(idx2, i), D_(idx1, i));
                break;

            case METHOD_METR_COMPLETE:
                /*
                Complete linkage.

                Characteristic: new distances are never shorter than the old distances.
                */
                // Update the distance matrix in the range [start, idx1).
                for (i = active_nodes.start; i < idx1; i = active_nodes.succ[i])
                    f_complete(&D_(i, idx2), D_(i, idx1));
                // Update the distance matrix in the range (idx1, idx2).
                for (; i < idx2; i = active_nodes.succ[i])
                    f_complete(&D_(i, idx2), D_(idx1, i));
                // Update the distance matrix in the range (idx2, N).
                for (i = active_nodes.succ[idx2]; i < N; i = active_nodes.succ[i])
                    f_complete(&D_(idx2, i), D_(idx1, i));
                break;

            case METHOD_METR_AVERAGE: {
                /*
                Average linkage.

                Shorter and longer distances can occur.
                */
                // Update the distance matrix in the range [start, idx1).
                t_float s = size1 / (size1 + size2);
                t_float t = size2 / (size1 + size2);
                for (i = active_nodes.start; i < idx1; i = active_nodes.succ[i])
                    f_average(&D_(i, idx2), D_(i, idx1), s, t);
                // Update the distance matrix in the range (idx1, idx2).
                for (; i < idx2; i = active_nodes.succ[i])
                    f_average(&D_(i, idx2), D_(idx1, i), s, t);
                // Update the distance matrix in the range (idx2, N).
                for (i = active_nodes.succ[idx2]; i < N; i = active_nodes.succ[i])
                    f_average(&D_(idx2, i), D_(idx1, i), s, t);
                break;
            }

            case METHOD_METR_WEIGHTED:
                /*
                Weighted linkage.

                Shorter and longer distances can occur.
                */
                // Update the distance matrix in the range [start, idx1).
                for (i = active_nodes.start; i < idx1; i = active_nodes.succ[i])
                    f_weighted(&D_(i, idx2), D_(i, idx1));
                // Update the distance matrix in the range (idx1, idx2).
                for (; i < idx2; i = active_nodes.succ[i])
                    f_weighted(&D_(i, idx2), D_(idx1, i));
                // Update the distance matrix in the range (idx2, N).
                for (i = active_nodes.succ[idx2]; i < N; i = active_nodes.succ[i])
                    f_weighted(&D_(idx2, i), D_(idx1, i));
                break;

            case METHOD_METR_WARD:
                /*
                Ward linkage.

                Shorter and longer distances can occur, not smaller than min(d1,d2)
                but maybe bigger than max(d1,d2).
                */
                // Update the distance matrix in the range [start, idx1).
                // t_float v = static_cast<t_float>(members[i]);
                for (i = active_nodes.start; i < idx1; i = active_nodes.succ[i])
                    f_ward(&D_(i, idx2), D_(i, idx1), min,
                           size1, size2, static_cast<t_float>(members[i]));
                // Update the distance matrix in the range (idx1, idx2).
                for (; i < idx2; i = active_nodes.succ[i])
                    f_ward(&D_(i, idx2), D_(idx1, i), min,
                           size1, size2, static_cast<t_float>(members[i]));
                // Update the distance matrix in the range (idx2, N).
                for (i = active_nodes.succ[idx2]; i < N; i = active_nodes.succ[i])
                    f_ward(&D_(idx2, i), D_(idx1, i), min,
                           size1, size2, static_cast<t_float>(members[i]));
                break;

            default:
                throw std::runtime_error(std::string("Invalid method."));
        }
    }
#ifdef FE_INVALID
    if (fetestexcept(FE_INVALID)) throw fenv_error();
#endif
}

class binary_min_heap {
    /*
    Class for a binary min-heap. The data resides in an array A. The elements of
    A are not changed but two lists I and R of indices are generated which point
    to elements of A and backwards.

    The heap tree structure is

       H[2*i+1]     H[2*i+2]
           \            /
            \          /
             ≤        ≤
              \      /
               \    /
                H[i]

    where the children must be less or equal than their parent. Thus, H[0]
    contains the minimum. The lists I and R are made such that H[i] = A[I[i]]
    and R[I[i]] = i.

    This implementation is not designed to handle NaN values.
    */
private:
    t_float *const A;
    t_index size;
    auto_array_ptr<t_index> I;
    auto_array_ptr<t_index> R;

    // no default constructor
    binary_min_heap();
    // noncopyable
    binary_min_heap(binary_min_heap const &);
    binary_min_heap &operator=(binary_min_heap const &);

public:
    binary_min_heap(t_float *const A_, const t_index size_)
        : A(A_), size(size_), I(size), R(size) {  // Allocate memory and initialize the lists I and R to the identity. This
        // does not make it a heap. Call heapify afterwards!
        for (t_index i = 0; i < size; ++i)
            R[i] = I[i] = i;
    }

    binary_min_heap(t_float *const A_, const t_index size1, const t_index size2,
                    const t_index start)
        : A(A_), size(size1), I(size1), R(size2) {  // Allocate memory and initialize the lists I and R to the identity. This
        // does not make it a heap. Call heapify afterwards!
        for (t_index i = 0; i < size; ++i) {
            R[i + start] = i;
            I[i] = i + start;
        }
    }

    ~binary_min_heap() {}

    void heapify() {
        // Arrange the indices I and R so that H[i] := A[I[i]] satisfies the heap
        // condition H[i] < H[2*i+1] and H[i] < H[2*i+2] for each i.
        //
        // Complexity: Θ(size)
        // Reference: Cormen, Leiserson, Rivest, Stein, Introduction to Algorithms,
        // 3rd ed., 2009, Section 6.3 “Building a heap”
        t_index idx;
        for (idx = (size >> 1); idx > 0;) {
            --idx;
            update_geq_(idx);
        }
    }

    inline t_index argmin() const {
        // Return the minimal element.
        return I[0];
    }

    void heap_pop() {
        // Remove the minimal element from the heap.
        --size;
        I[0] = I[size];
        R[I[0]] = 0;
        update_geq_(0);
    }

    void remove(t_index idx) {
        // Remove an element from the heap.
        --size;
        R[I[size]] = R[idx];
        I[R[idx]] = I[size];
        if (H(size) <= A[idx]) {
            update_leq_(R[idx]);
        } else {
            update_geq_(R[idx]);
        }
    }

    void replace(const t_index idxold, const t_index idxnew,
                 const t_float val) {
        R[idxnew] = R[idxold];
        I[R[idxnew]] = idxnew;
        if (val <= A[idxold])
            update_leq(idxnew, val);
        else
            update_geq(idxnew, val);
    }

    void update(const t_index idx, const t_float val) const {
        // Update the element A[i] with val and re-arrange the indices to preserve
        // the heap condition.
        if (val <= A[idx])
            update_leq(idx, val);
        else
            update_geq(idx, val);
    }

    void update_leq(const t_index idx, const t_float val) const {
        // Use this when the new value is not more than the old value.
        A[idx] = val;
        update_leq_(R[idx]);
    }

    void update_geq(const t_index idx, const t_float val) const {
        // Use this when the new value is not less than the old value.
        A[idx] = val;
        update_geq_(R[idx]);
    }

private:
    void update_leq_(t_index i) const {
        t_index j;
        for (; (i > 0) && (H(i) < H(j = (i - 1) >> 1)); i = j)
            heap_swap(i, j);
    }

    void update_geq_(t_index i) const {
        t_index j;
        for (; (j = 2 * i + 1) < size; i = j) {
            if (H(j) >= H(i)) {
                ++j;
                if (j >= size || H(j) >= H(i)) break;
            } else if (j + 1 < size && H(j + 1) < H(j))
                ++j;
            heap_swap(i, j);
        }
    }

    void heap_swap(const t_index i, const t_index j) const {
        // Swap two indices.
        t_index tmp = I[i];
        I[i] = I[j];
        I[j] = tmp;
        R[I[i]] = i;
        R[I[j]] = j;
    }

    inline t_float H(const t_index i) const {
        return A[I[i]];
    }
};

template <method_codes method, typename t_members>
static void generic_linkage(const t_index N, t_float *const D, t_members *const members, cluster_result &Z2) {
    /*
      N: integer, number of data points
      D: condensed distance matrix N*(N-1)/2
      Z2: output data structure
    */

    const t_index N_1 = N - 1;
    t_index i, j;        // loop variables
    t_index idx1, idx2;  // row and column indices

    auto_array_ptr<t_index> n_nghbr(N_1);  // array of nearest neighbors
    auto_array_ptr<t_float> mindist(N_1);  // distances to the nearest neighbors
    auto_array_ptr<t_index> row_repr(N);   // row_repr[i]: node number that the
                                           // i-th row represents
    doubly_linked_list active_nodes(N);
    binary_min_heap nn_distances(&*mindist, N_1);  // minimum heap structure for
                                                   // the distance to the nearest neighbor of each point
    t_index node1, node2;                          // node numbers in the output
    t_float size1, size2;                          // and their cardinalities

    t_float min;  // minimum and row index for nearest-neighbor search
    t_index idx;

    for (i = 0; i < N; ++i)
        // Build a list of row ↔ node label assignments.
        // Initially i ↦ i
        row_repr[i] = i;

    // Initialize the minimal distances:
    // Find the nearest neighbor of each point.
    // n_nghbr[i] = argmin_{j>i} D(i,j) for i in range(N-1)
    t_float const *DD = D;
    for (i = 0; i < N_1; ++i) {
        min = std::numeric_limits<t_float>::infinity();
        for (idx = j = i + 1; j < N; ++j, ++DD) {
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
#endif
            if (*DD < min) {
                min = *DD;
                idx = j;
            } else if (fc_isnan(*DD))
                throw(nan_error());
        }
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
        mindist[i] = min;
        n_nghbr[i] = idx;
    }

    // Put the minimal distances into a heap structure to make the repeated
    // global minimum searches fast.
    nn_distances.heapify();

#ifdef FE_INVALID
    if (feclearexcept(FE_INVALID)) throw fenv_error();
#endif

    // Main loop: We have N-1 merging steps.
    for (i = 0; i < N_1; ++i) {
        /*
          Here is a special feature that allows fast bookkeeping and updates of the
          minimal distances.

          mindist[i] stores a lower bound on the minimum distance of the point i to
          all points of higher index:

              mindist[i] ≥ min_{j>i} D(i,j)

          Normally, we have equality. However, this minimum may become invalid due
          to the updates in the distance matrix. The rules are:

          1) If mindist[i] is equal to D(i, n_nghbr[i]), this is the correct
             minimum and n_nghbr[i] is a nearest neighbor.

          2) If mindist[i] is smaller than D(i, n_nghbr[i]), this might not be the
             correct minimum. The minimum needs to be recomputed.

          3) mindist[i] is never bigger than the true minimum. Hence, we never
             miss the true minimum if we take the smallest mindist entry,
             re-compute the value if necessary (thus maybe increasing it) and
             looking for the now smallest mindist entry until a valid minimal
             entry is found. This step is done in the lines below.

          The update process for D below takes care that these rules are
          fulfilled. This makes sure that the minima in the rows D(i,i+1:)of D are
          re-calculated when necessary but re-calculation is avoided whenever
          possible.

          The re-calculation of the minima makes the worst-case runtime of this
          algorithm cubic in N. We avoid this whenever possible, and in most cases
          the runtime appears to be quadratic.
        */
        idx1 = nn_distances.argmin();
        if (method != METHOD_METR_SINGLE) {
            while (mindist[idx1] < D_(idx1, n_nghbr[idx1])) {
                // Recompute the minimum mindist[idx1] and n_nghbr[idx1].
                n_nghbr[idx1] = j = active_nodes.succ[idx1];  // exists, maximally N-1
                min = D_(idx1, j);
                for (j = active_nodes.succ[j]; j < N; j = active_nodes.succ[j]) {
                    if (D_(idx1, j) < min) {
                        min = D_(idx1, j);
                        n_nghbr[idx1] = j;
                    }
                }
                /* Update the heap with the new true minimum and search for the
                   (possibly different) minimal entry. */
                nn_distances.update_geq(idx1, min);
                idx1 = nn_distances.argmin();
            }
        }

        nn_distances.heap_pop();  // Remove the current minimum from the heap.
        idx2 = n_nghbr[idx1];

        // Write the newly found minimal pair of nodes to the output array.
        node1 = row_repr[idx1];
        node2 = row_repr[idx2];

        if (method == METHOD_METR_AVERAGE ||
            method == METHOD_METR_WARD ||
            method == METHOD_METR_CENTROID) {
            size1 = static_cast<t_float>(members[idx1]);
            size2 = static_cast<t_float>(members[idx2]);
            members[idx2] += members[idx1];
        }
        Z2.append(node1, node2, mindist[idx1]);

        // Remove idx1 from the list of active indices (active_nodes).
        active_nodes.remove(idx1);
        // Index idx2 now represents the new (merged) node with label N+i.
        row_repr[idx2] = N + i;

        // Update the distance matrix
        switch (method) {
            case METHOD_METR_SINGLE:
                /*
                  Single linkage.

                  Characteristic: new distances are never longer than the old distances.
                */
                // Update the distance matrix in the range [start, idx1).
                for (j = active_nodes.start; j < idx1; j = active_nodes.succ[j]) {
                    f_single(&D_(j, idx2), D_(j, idx1));
                    if (n_nghbr[j] == idx1)
                        n_nghbr[j] = idx2;
                }
                // Update the distance matrix in the range (idx1, idx2).
                for (; j < idx2; j = active_nodes.succ[j]) {
                    f_single(&D_(j, idx2), D_(idx1, j));
                    // If the new value is below the old minimum in a row, update
                    // the mindist and n_nghbr arrays.
                    if (D_(j, idx2) < mindist[j]) {
                        nn_distances.update_leq(j, D_(j, idx2));
                        n_nghbr[j] = idx2;
                    }
                }
                // Update the distance matrix in the range (idx2, N).
                // Recompute the minimum mindist[idx2] and n_nghbr[idx2].
                if (idx2 < N_1) {
                    min = mindist[idx2];
                    for (j = active_nodes.succ[idx2]; j < N; j = active_nodes.succ[j]) {
                        f_single(&D_(idx2, j), D_(idx1, j));
                        if (D_(idx2, j) < min) {
                            n_nghbr[idx2] = j;
                            min = D_(idx2, j);
                        }
                    }
                    nn_distances.update_leq(idx2, min);
                }
                break;

            case METHOD_METR_COMPLETE:
                /*
                  Complete linkage.

                  Characteristic: new distances are never shorter than the old distances.
                */
                // Update the distance matrix in the range [start, idx1).
                for (j = active_nodes.start; j < idx1; j = active_nodes.succ[j]) {
                    f_complete(&D_(j, idx2), D_(j, idx1));
                    if (n_nghbr[j] == idx1)
                        n_nghbr[j] = idx2;
                }
                // Update the distance matrix in the range (idx1, idx2).
                for (; j < idx2; j = active_nodes.succ[j])
                    f_complete(&D_(j, idx2), D_(idx1, j));
                // Update the distance matrix in the range (idx2, N).
                for (j = active_nodes.succ[idx2]; j < N; j = active_nodes.succ[j])
                    f_complete(&D_(idx2, j), D_(idx1, j));
                break;

            case METHOD_METR_AVERAGE: {
                /*
                  Average linkage.

                  Shorter and longer distances can occur.
                */
                // Update the distance matrix in the range [start, idx1).
                t_float s = size1 / (size1 + size2);
                t_float t = size2 / (size1 + size2);
                for (j = active_nodes.start; j < idx1; j = active_nodes.succ[j]) {
                    f_average(&D_(j, idx2), D_(j, idx1), s, t);
                    if (n_nghbr[j] == idx1)
                        n_nghbr[j] = idx2;
                }
                // Update the distance matrix in the range (idx1, idx2).
                for (; j < idx2; j = active_nodes.succ[j]) {
                    f_average(&D_(j, idx2), D_(idx1, j), s, t);
                    if (D_(j, idx2) < mindist[j]) {
                        nn_distances.update_leq(j, D_(j, idx2));
                        n_nghbr[j] = idx2;
                    }
                }
                // Update the distance matrix in the range (idx2, N).
                if (idx2 < N_1) {
                    n_nghbr[idx2] = j = active_nodes.succ[idx2];  // exists, maximally N-1
                    f_average(&D_(idx2, j), D_(idx1, j), s, t);
                    min = D_(idx2, j);
                    for (j = active_nodes.succ[j]; j < N; j = active_nodes.succ[j]) {
                        f_average(&D_(idx2, j), D_(idx1, j), s, t);
                        if (D_(idx2, j) < min) {
                            min = D_(idx2, j);
                            n_nghbr[idx2] = j;
                        }
                    }
                    nn_distances.update(idx2, min);
                }
                break;
            }

            case METHOD_METR_WEIGHTED:
                /*
                  Weighted linkage.

                  Shorter and longer distances can occur.
                */
                // Update the distance matrix in the range [start, idx1).
                for (j = active_nodes.start; j < idx1; j = active_nodes.succ[j]) {
                    f_weighted(&D_(j, idx2), D_(j, idx1));
                    if (n_nghbr[j] == idx1)
                        n_nghbr[j] = idx2;
                }
                // Update the distance matrix in the range (idx1, idx2).
                for (; j < idx2; j = active_nodes.succ[j]) {
                    f_weighted(&D_(j, idx2), D_(idx1, j));
                    if (D_(j, idx2) < mindist[j]) {
                        nn_distances.update_leq(j, D_(j, idx2));
                        n_nghbr[j] = idx2;
                    }
                }
                // Update the distance matrix in the range (idx2, N).
                if (idx2 < N_1) {
                    n_nghbr[idx2] = j = active_nodes.succ[idx2];  // exists, maximally N-1
                    f_weighted(&D_(idx2, j), D_(idx1, j));
                    min = D_(idx2, j);
                    for (j = active_nodes.succ[j]; j < N; j = active_nodes.succ[j]) {
                        f_weighted(&D_(idx2, j), D_(idx1, j));
                        if (D_(idx2, j) < min) {
                            min = D_(idx2, j);
                            n_nghbr[idx2] = j;
                        }
                    }
                    nn_distances.update(idx2, min);
                }
                break;

            case METHOD_METR_WARD:
                /*
                  Ward linkage.

                  Shorter and longer distances can occur, not smaller than min(d1,d2)
                  but maybe bigger than max(d1,d2).
                */
                // Update the distance matrix in the range [start, idx1).
                for (j = active_nodes.start; j < idx1; j = active_nodes.succ[j]) {
                    f_ward(&D_(j, idx2), D_(j, idx1), mindist[idx1],
                           size1, size2, static_cast<t_float>(members[j]));
                    if (n_nghbr[j] == idx1)
                        n_nghbr[j] = idx2;
                }
                // Update the distance matrix in the range (idx1, idx2).
                for (; j < idx2; j = active_nodes.succ[j]) {
                    f_ward(&D_(j, idx2), D_(idx1, j), mindist[idx1], size1, size2,
                           static_cast<t_float>(members[j]));
                    if (D_(j, idx2) < mindist[j]) {
                        nn_distances.update_leq(j, D_(j, idx2));
                        n_nghbr[j] = idx2;
                    }
                }
                // Update the distance matrix in the range (idx2, N).
                if (idx2 < N_1) {
                    n_nghbr[idx2] = j = active_nodes.succ[idx2];  // exists, maximally N-1
                    f_ward(&D_(idx2, j), D_(idx1, j), mindist[idx1],
                           size1, size2, static_cast<t_float>(members[j]));
                    min = D_(idx2, j);
                    for (j = active_nodes.succ[j]; j < N; j = active_nodes.succ[j]) {
                        f_ward(&D_(idx2, j), D_(idx1, j), mindist[idx1],
                               size1, size2, static_cast<t_float>(members[j]));
                        if (D_(idx2, j) < min) {
                            min = D_(idx2, j);
                            n_nghbr[idx2] = j;
                        }
                    }
                    nn_distances.update(idx2, min);
                }
                break;

            case METHOD_METR_CENTROID: {
                /*
                  Centroid linkage.

                  Shorter and longer distances can occur, not bigger than max(d1,d2)
                  but maybe smaller than min(d1,d2).
                */
                // Update the distance matrix in the range [start, idx1).
                t_float s = size1 / (size1 + size2);
                t_float t = size2 / (size1 + size2);
                t_float stc = s * t * mindist[idx1];
                for (j = active_nodes.start; j < idx1; j = active_nodes.succ[j]) {
                    f_centroid(&D_(j, idx2), D_(j, idx1), stc, s, t);
                    if (D_(j, idx2) < mindist[j]) {
                        nn_distances.update_leq(j, D_(j, idx2));
                        n_nghbr[j] = idx2;
                    } else if (n_nghbr[j] == idx1)
                        n_nghbr[j] = idx2;
                }
                // Update the distance matrix in the range (idx1, idx2).
                for (; j < idx2; j = active_nodes.succ[j]) {
                    f_centroid(&D_(j, idx2), D_(idx1, j), stc, s, t);
                    if (D_(j, idx2) < mindist[j]) {
                        nn_distances.update_leq(j, D_(j, idx2));
                        n_nghbr[j] = idx2;
                    }
                }
                // Update the distance matrix in the range (idx2, N).
                if (idx2 < N_1) {
                    n_nghbr[idx2] = j = active_nodes.succ[idx2];  // exists, maximally N-1
                    f_centroid(&D_(idx2, j), D_(idx1, j), stc, s, t);
                    min = D_(idx2, j);
                    for (j = active_nodes.succ[j]; j < N; j = active_nodes.succ[j]) {
                        f_centroid(&D_(idx2, j), D_(idx1, j), stc, s, t);
                        if (D_(idx2, j) < min) {
                            min = D_(idx2, j);
                            n_nghbr[idx2] = j;
                        }
                    }
                    nn_distances.update(idx2, min);
                }
                break;
            }

            case METHOD_METR_MEDIAN: {
                /*
                  Median linkage.

                  Shorter and longer distances can occur, not bigger than max(d1,d2)
                  but maybe smaller than min(d1,d2).
                */
                // Update the distance matrix in the range [start, idx1).
                t_float c_4 = mindist[idx1] * .25;
                for (j = active_nodes.start; j < idx1; j = active_nodes.succ[j]) {
                    f_median(&D_(j, idx2), D_(j, idx1), c_4);
                    if (D_(j, idx2) < mindist[j]) {
                        nn_distances.update_leq(j, D_(j, idx2));
                        n_nghbr[j] = idx2;
                    } else if (n_nghbr[j] == idx1)
                        n_nghbr[j] = idx2;
                }
                // Update the distance matrix in the range (idx1, idx2).
                for (; j < idx2; j = active_nodes.succ[j]) {
                    f_median(&D_(j, idx2), D_(idx1, j), c_4);
                    if (D_(j, idx2) < mindist[j]) {
                        nn_distances.update_leq(j, D_(j, idx2));
                        n_nghbr[j] = idx2;
                    }
                }
                // Update the distance matrix in the range (idx2, N).
                if (idx2 < N_1) {
                    n_nghbr[idx2] = j = active_nodes.succ[idx2];  // exists, maximally N-1
                    f_median(&D_(idx2, j), D_(idx1, j), c_4);
                    min = D_(idx2, j);
                    for (j = active_nodes.succ[j]; j < N; j = active_nodes.succ[j]) {
                        f_median(&D_(idx2, j), D_(idx1, j), c_4);
                        if (D_(idx2, j) < min) {
                            min = D_(idx2, j);
                            n_nghbr[idx2] = j;
                        }
                    }
                    nn_distances.update(idx2, min);
                }
                break;
            }

            default:
                throw std::runtime_error(std::string("Invalid method."));
        }
    }
#ifdef FE_INVALID
    if (fetestexcept(FE_INVALID)) throw fenv_error();
#endif
}

/*
  Clustering methods for vector data
*/

template <typename t_dissimilarity>
static void MST_linkage_core_vector(const t_index N,
                                    t_dissimilarity &dist,
                                    cluster_result &Z2) {
    /*
        N: integer, number of data points
        dist: function pointer to the metric
        Z2: output data structure

        The basis of this algorithm is an algorithm by Rohlf:

        F. James Rohlf, Hierarchical clustering using the minimum spanning tree,
        The Computer Journal, vol. 16, 1973, p. 93–95.
    */
    t_index i;
    t_index idx2;
    doubly_linked_list active_nodes(N);
    auto_array_ptr<t_float> d(N);

    t_index prev_node;
    t_float min;

    // first iteration
    idx2 = 1;
    min = std::numeric_limits<t_float>::infinity();
    for (i = 1; i < N; ++i) {
        d[i] = dist(0, i);
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
#endif
        if (d[i] < min) {
            min = d[i];
            idx2 = i;
        } else if (fc_isnan(d[i]))
            throw(nan_error());
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
    }

    Z2.append(0, idx2, min);

    for (t_index j = 1; j < N - 1; ++j) {
        prev_node = idx2;
        active_nodes.remove(prev_node);

        idx2 = active_nodes.succ[0];
        min = d[idx2];

        for (i = idx2; i < N; i = active_nodes.succ[i]) {
            t_float tmp = dist(i, prev_node);
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wfloat-equal"
#endif
            if (d[i] > tmp)
                d[i] = tmp;
            else if (fc_isnan(tmp))
                throw(nan_error());
#if HAVE_DIAGNOSTIC
#pragma GCC diagnostic pop
#endif
            if (d[i] < min) {
                min = d[i];
                idx2 = i;
            }
        }
        Z2.append(prev_node, idx2, min);
    }
}

template <method_codes_vector method, typename t_dissimilarity>
static void generic_linkage_vector(const t_index N,
                                   t_dissimilarity &dist,
                                   cluster_result &Z2) {
    /*
      N: integer, number of data points
      dist: function pointer to the metric
      Z2: output data structure

      This algorithm is valid for the distance update methods
      "Ward", "centroid" and "median" only!
    */
    const t_index N_1 = N - 1;
    t_index i, j;        // loop variables
    t_index idx1, idx2;  // row and column indices

    auto_array_ptr<t_index> n_nghbr(N_1);  // array of nearest neighbors
    auto_array_ptr<t_float> mindist(N_1);  // distances to the nearest neighbors
    auto_array_ptr<t_index> row_repr(N);   // row_repr[i]: node number that the
                                           // i-th row represents
    doubly_linked_list active_nodes(N);
    binary_min_heap nn_distances(&*mindist, N_1);  // minimum heap structure for
                                                   // the distance to the nearest neighbor of each point
    t_index node1, node2;                          // node numbers in the output
    t_float min;                                   // minimum and row index for nearest-neighbor search

    for (i = 0; i < N; ++i)
        // Build a list of row ↔ node label assignments.
        // Initially i ↦ i
        row_repr[i] = i;

    // Initialize the minimal distances:
    // Find the nearest neighbor of each point.
    // n_nghbr[i] = argmin_{j>i} D(i,j) for i in range(N-1)
    for (i = 0; i < N_1; ++i) {
        min = std::numeric_limits<t_float>::infinity();
        t_index idx;
        for (idx = j = i + 1; j < N; ++j) {
            t_float tmp;
            switch (method) {
                case METHOD_VECTOR_WARD:
                    tmp = dist.ward_initial(i, j);
                    break;
                default:
                    tmp = dist.template sqeuclidean<true>(i, j);
            }
            if (tmp < min) {
                min = tmp;
                idx = j;
            }
        }
        switch (method) {
            case METHOD_VECTOR_WARD:
                mindist[i] = t_dissimilarity::ward_initial_conversion(min);
                break;
            default:
                mindist[i] = min;
        }
        n_nghbr[i] = idx;
    }

    // Put the minimal distances into a heap structure to make the repeated
    // global minimum searches fast.
    nn_distances.heapify();

    // Main loop: We have N-1 merging steps.
    for (i = 0; i < N_1; ++i) {
        idx1 = nn_distances.argmin();

        while (active_nodes.is_inactive(n_nghbr[idx1])) {
            // Recompute the minimum mindist[idx1] and n_nghbr[idx1].
            n_nghbr[idx1] = j = active_nodes.succ[idx1];  // exists, maximally N-1
            switch (method) {
                case METHOD_VECTOR_WARD:
                    min = dist.ward(idx1, j);
                    for (j = active_nodes.succ[j]; j < N; j = active_nodes.succ[j]) {
                        t_float const tmp = dist.ward(idx1, j);
                        if (tmp < min) {
                            min = tmp;
                            n_nghbr[idx1] = j;
                        }
                    }
                    break;
                default:
                    min = dist.template sqeuclidean<true>(idx1, j);
                    for (j = active_nodes.succ[j]; j < N; j = active_nodes.succ[j]) {
                        t_float const tmp = dist.template sqeuclidean<true>(idx1, j);
                        if (tmp < min) {
                            min = tmp;
                            n_nghbr[idx1] = j;
                        }
                    }
            }
            /* Update the heap with the new true minimum and search for the (possibly
               different) minimal entry. */
            nn_distances.update_geq(idx1, min);
            idx1 = nn_distances.argmin();
        }

        nn_distances.heap_pop();  // Remove the current minimum from the heap.
        idx2 = n_nghbr[idx1];

        // Write the newly found minimal pair of nodes to the output array.
        node1 = row_repr[idx1];
        node2 = row_repr[idx2];

        Z2.append(node1, node2, mindist[idx1]);

        switch (method) {
            case METHOD_VECTOR_WARD:
            case METHOD_VECTOR_CENTROID:
                dist.merge_inplace(idx1, idx2);
                break;
            case METHOD_VECTOR_MEDIAN:
                dist.merge_inplace_weighted(idx1, idx2);
                break;
            default:
                throw std::runtime_error(std::string("Invalid method."));
        }

        // Index idx2 now represents the new (merged) node with label N+i.
        row_repr[idx2] = N + i;
        // Remove idx1 from the list of active indices (active_nodes).
        active_nodes.remove(idx1);  // TBD later!!!

        // Update the distance matrix
        switch (method) {
            case METHOD_VECTOR_WARD:
                /*
                  Ward linkage.

                  Shorter and longer distances can occur, not smaller than min(d1,d2)
                  but maybe bigger than max(d1,d2).
                */
                // Update the distance matrix in the range [start, idx1).
                for (j = active_nodes.start; j < idx1; j = active_nodes.succ[j]) {
                    if (n_nghbr[j] == idx2) {
                        n_nghbr[j] = idx1;  // invalidate
                    }
                }
                // Update the distance matrix in the range (idx1, idx2).
                for (; j < idx2; j = active_nodes.succ[j]) {
                    t_float const tmp = dist.ward(j, idx2);
                    if (tmp < mindist[j]) {
                        nn_distances.update_leq(j, tmp);
                        n_nghbr[j] = idx2;
                    } else if (n_nghbr[j] == idx2) {
                        n_nghbr[j] = idx1;  // invalidate
                    }
                }
                // Find the nearest neighbor for idx2.
                if (idx2 < N_1) {
                    n_nghbr[idx2] = j = active_nodes.succ[idx2];  // exists, maximally N-1
                    min = dist.ward(idx2, j);
                    for (j = active_nodes.succ[j]; j < N; j = active_nodes.succ[j]) {
                        t_float const tmp = dist.ward(idx2, j);
                        if (tmp < min) {
                            min = tmp;
                            n_nghbr[idx2] = j;
                        }
                    }
                    nn_distances.update(idx2, min);
                }
                break;

            default:
                /*
                  Centroid and median linkage.

                  Shorter and longer distances can occur, not bigger than max(d1,d2)
                  but maybe smaller than min(d1,d2).
                */
                for (j = active_nodes.start; j < idx2; j = active_nodes.succ[j]) {
                    t_float const tmp = dist.template sqeuclidean<true>(j, idx2);
                    if (tmp < mindist[j]) {
                        nn_distances.update_leq(j, tmp);
                        n_nghbr[j] = idx2;
                    } else if (n_nghbr[j] == idx2)
                        n_nghbr[j] = idx1;  // invalidate
                }
                // Find the nearest neighbor for idx2.
                if (idx2 < N_1) {
                    n_nghbr[idx2] = j = active_nodes.succ[idx2];  // exists, maximally N-1
                    min = dist.template sqeuclidean<true>(idx2, j);
                    for (j = active_nodes.succ[j]; j < N; j = active_nodes.succ[j]) {
                        t_float const tmp = dist.template sqeuclidean<true>(idx2, j);
                        if (tmp < min) {
                            min = tmp;
                            n_nghbr[idx2] = j;
                        }
                    }
                    nn_distances.update(idx2, min);
                }
        }
    }
}

template <method_codes_vector method, typename t_dissimilarity>
static void generic_linkage_vector_alternative(const t_index N,
                                               t_dissimilarity &dist,
                                               cluster_result &Z2) {
    /*
      N: integer, number of data points
      dist: function pointer to the metric
      Z2: output data structure

      This algorithm is valid for the distance update methods
      "Ward", "centroid" and "median" only!
    */
    const t_index N_1 = N - 1;
    t_index i, j = 0;    // loop variables
    t_index idx1, idx2;  // row and column indices

    auto_array_ptr<t_index> n_nghbr(2 * N - 2);  // array of nearest neighbors
    auto_array_ptr<t_float> mindist(2 * N - 2);  // distances to the nearest neighbors

    doubly_linked_list active_nodes(N + N_1);
    binary_min_heap nn_distances(&*mindist, N_1, 2 * N - 2, 1);  // minimum heap
                                                                 // structure for the distance to the nearest neighbor of each point

    t_float min;  // minimum for nearest-neighbor searches

    // Initialize the minimal distances:
    // Find the nearest neighbor of each point.
    // n_nghbr[i] = argmin_{j>i} D(i,j) for i in range(N-1)
    for (i = 1; i < N; ++i) {
        min = std::numeric_limits<t_float>::infinity();
        t_index idx;
        for (idx = j = 0; j < i; ++j) {
            t_float tmp;
            switch (method) {
                case METHOD_VECTOR_WARD:
                    tmp = dist.ward_initial(i, j);
                    break;
                default:
                    tmp = dist.template sqeuclidean<true>(i, j);
            }
            if (tmp < min) {
                min = tmp;
                idx = j;
            }
        }
        switch (method) {
            case METHOD_VECTOR_WARD:
                mindist[i] = t_dissimilarity::ward_initial_conversion(min);
                break;
            default:
                mindist[i] = min;
        }
        n_nghbr[i] = idx;
    }

    // Put the minimal distances into a heap structure to make the repeated
    // global minimum searches fast.
    nn_distances.heapify();

    // Main loop: We have N-1 merging steps.
    for (i = N; i < N + N_1; ++i) {
        /*
          The bookkeeping is different from the "stored matrix approach" algorithm
          generic_linkage.

          mindist[i] stores a lower bound on the minimum distance of the point i to
          all points of *lower* index:

              mindist[i] ≥ min_{j<i} D(i,j)

          Moreover, new nodes do not re-use one of the old indices, but they are
          given a new, unique index (SciPy convention: initial nodes are 0,…,N−1,
          new nodes are N,…,2N−2).

          Invalid nearest neighbors are not recognized by the fact that the stored
          distance is smaller than the actual distance, but the list active_nodes
          maintains a flag whether a node is inactive. If n_nghbr[i] points to an
          active node, the entries nn_distances[i] and n_nghbr[i] are valid,
          otherwise they must be recomputed.
        */
        idx1 = nn_distances.argmin();
        while (active_nodes.is_inactive(n_nghbr[idx1])) {
            // Recompute the minimum mindist[idx1] and n_nghbr[idx1].
            n_nghbr[idx1] = j = active_nodes.start;
            switch (method) {
                case METHOD_VECTOR_WARD:
                    min = dist.ward_extended(idx1, j);
                    for (j = active_nodes.succ[j]; j < idx1; j = active_nodes.succ[j]) {
                        t_float tmp = dist.ward_extended(idx1, j);
                        if (tmp < min) {
                            min = tmp;
                            n_nghbr[idx1] = j;
                        }
                    }
                    break;
                default:
                    min = dist.sqeuclidean_extended(idx1, j);
                    for (j = active_nodes.succ[j]; j < idx1; j = active_nodes.succ[j]) {
                        t_float const tmp = dist.sqeuclidean_extended(idx1, j);
                        if (tmp < min) {
                            min = tmp;
                            n_nghbr[idx1] = j;
                        }
                    }
            }
            /* Update the heap with the new true minimum and search for the (possibly
               different) minimal entry. */
            nn_distances.update_geq(idx1, min);
            idx1 = nn_distances.argmin();
        }

        idx2 = n_nghbr[idx1];
        active_nodes.remove(idx1);
        active_nodes.remove(idx2);

        Z2.append(idx1, idx2, mindist[idx1]);

        if (i < 2 * N_1) {
            switch (method) {
                case METHOD_VECTOR_WARD:
                case METHOD_VECTOR_CENTROID:
                    dist.merge(idx1, idx2, i);
                    break;

                case METHOD_VECTOR_MEDIAN:
                    dist.merge_weighted(idx1, idx2, i);
                    break;

                default:
                    throw std::runtime_error(std::string("Invalid method."));
            }

            n_nghbr[i] = active_nodes.start;
            if (method == METHOD_VECTOR_WARD) {
                /*
                  Ward linkage.

                  Shorter and longer distances can occur, not smaller than min(d1,d2)
                  but maybe bigger than max(d1,d2).
                */
                min = dist.ward_extended(active_nodes.start, i);
                for (j = active_nodes.succ[active_nodes.start]; j < i;
                     j = active_nodes.succ[j]) {
                    t_float tmp = dist.ward_extended(j, i);
                    if (tmp < min) {
                        min = tmp;
                        n_nghbr[i] = j;
                    }
                }
            } else {
                /*
                  Centroid and median linkage.

                  Shorter and longer distances can occur, not bigger than max(d1,d2)
                  but maybe smaller than min(d1,d2).
                */
                min = dist.sqeuclidean_extended(active_nodes.start, i);
                for (j = active_nodes.succ[active_nodes.start]; j < i;
                     j = active_nodes.succ[j]) {
                    t_float tmp = dist.sqeuclidean_extended(j, i);
                    if (tmp < min) {
                        min = tmp;
                        n_nghbr[i] = j;
                    }
                }
            }
            if (idx2 < active_nodes.start) {
                nn_distances.remove(active_nodes.start);
            } else {
                nn_distances.remove(idx2);
            }
            nn_distances.replace(idx1, i, min);
        }
    }
}

#if HAVE_VISIBILITY
#pragma GCC visibility pop
#endif