#include <stdlib.h>
#include <string.h>

int* cnt;

// canonical_reordering_sign_euclidean(a, b)
static int prodsign(unsigned a, unsigned b) {
    int sum = 0;
    while ((a >>= 1) != 0) {
        sum += cnt[a & b];
    }
    return (sum & 1) ? -1 : 1;
}

// canonical_reordering_sign(a, b, metric)
float prodval(unsigned a, unsigned b, float* metric) {
    unsigned bitmap = a & b;
    float val = (float) prodsign(a, b);
    for (int i = 0; bitmap != 0; i++, bitmap >>= 1) {
        val *= ((bitmap & 1) ? metric[i] : 1.0F);
    }
    return val;
}

int cmp(const void* a, const void* b) {
    int va = *(int*)a;
    int vb = *(int*)b;
    int na = cnt[(unsigned)va];
    int nb = cnt[(unsigned)vb];
    if (na < nb)
        return -1;
    if (na > nb)
        return 1;
    return va - vb;
}

void bldbasis(const unsigned* arange, 
              unsigned* i2b, 
              int* grades, 
              unsigned length) {
    for (int i = 0; i < length; i++) {
        grades[i] = __builtin_popcount(i);
    }
    cnt = (int*)malloc(sizeof(int) * length);
    memcpy(cnt, grades, sizeof(int) * length);
    memcpy(i2b, arange, sizeof(int) * length);
    qsort(i2b, length, sizeof(int), cmp);
}

void gmt(unsigned* i2b,
         unsigned* b2i,
         float* metric,
         float* table, /* length * length * length */
         unsigned length) {
    for (int i = 0; i < length; i++) {
        unsigned bi = i2b[i];
        float* slice = table + i * length * length;
        for (int j = 0; j < length; j++) {
            unsigned bj = i2b[j];
            unsigned bk = bi ^ bj;
            slice[j * length + b2i[bk]] = prodval(bi, bj, metric);
        }
    }
    free(cnt);
}
