#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define RLE_MIN_RUN 4
static void rle_build_freq(const uint8_t *in, int n, uint32_t freq[256]) {
  for (int i = 0; i < 256; i++)
    freq[i] = 0;
  int i = 0;
  while (i < n) {
    uint8_t b = in[i];
    int run = 1;
    while ((i + run) < n && in[i + run] == b && run < 255)
      run++;
    if (run >= RLE_MIN_RUN) {
      freq[b] += (uint32_t)run;
    } else {
      freq[b] += (uint32_t)run;
    }
    i += run;
  }
}
static uint64_t huffman_cost_bits(const uint32_t freq[256]) {
  uint32_t wfreq[512];
  int lch[512];
  int rch[512];
  char alive[512];
  for (int i = 0; i < 256; i++) {
    uint32_t f = freq[i];
    if (f == 0)
      f = 1;
    wfreq[i] = f;
    lch[i] = -1;
    rch[i] = -1;
    alive[i] = 1;
  }
  int nodes = 256;
  int root = 0;
  for (;;) {
    int a = -1, b = -1;
    for (int k = 0; k < nodes; k++) {
      if (!alive[k])
        continue;
      if (a == -1 || wfreq[k] < wfreq[a]) {
        b = a;
        a = k;
      } else if (b == -1 || wfreq[k] < wfreq[b]) {
        b = k;
      }
    }
    if (b == -1) {
      root = a;
      break;
    }
    wfreq[nodes] = wfreq[a] + wfreq[b];
    lch[nodes] = a;
    rch[nodes] = b;
    alive[a] = 0;
    alive[b] = 0;
    alive[nodes] = 1;
    nodes++;
  }
  int stack_idx[1024];
  int stack_depth[1024];
  int sp = 0;
  stack_idx[sp] = root;
  stack_depth[sp] = 0;
  sp++;
  int code_len[256];
  for (int i = 0; i < 256; i++)
    code_len[i] = 0;
  while (sp > 0) {
    sp--;
    int nd = stack_idx[sp];
    int d = stack_depth[sp];
    int lc = lch[nd];
    int rc = rch[nd];
    if (lc < 0 && rc < 0) {
      int cd = d;
      if (cd == 0)
        cd = 1;
      if (nd < 256)
        code_len[nd] = cd;
    } else {
      if (lc >= 0) {
        stack_idx[sp] = lc;
        stack_depth[sp] = d + 1;
        sp++;
      }
      if (rc >= 0) {
        stack_idx[sp] = rc;
        stack_depth[sp] = d + 1;
        sp++;
      }
    }
  }
  uint64_t bits = 0;
  for (int i = 0; i < 256; i++) {
    if (freq[i] != 0)
      bits += (uint64_t)freq[i] * (uint64_t)code_len[i];
  }
  return bits;
}
static double pipeline_run(int n, const uint8_t *inbuf) {
  uint32_t freq[256];
  rle_build_freq(inbuf, n, freq);
  uint64_t bits = huffman_cost_bits(freq);
  double v = 0.0;

  v = (double)(bits & 0xFFFFFFFFFFFFull);
  return v;
}
BENCH_MAIN_SCALAR3(
    T004_Module_039, DEFLATEH, 4096, 16384, 65536,
    uint8_t *inbuf = (uint8_t *)malloc((size_t)n);
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++)
        inbuf[i] = (uint8_t)(bench_rng_next(&rng) & 0xFFu);
    },
    ans_scalar = pipeline_run(n, inbuf), ans_scalar, free(inbuf);)
