#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define RANS_L 65536u
#define TOTFREQ 4096u
#define SYM_FREQ 256u
static inline void rans_emit_byte(uint8_t *out, size_t *outsz, uint8_t b) {
  out[*outsz] = b;
  (*outsz)++;
}
static inline void rans_put_symbol(uint32_t *state, uint8_t sym, uint8_t *out,
                                   size_t *outsz) {
  uint32_t f = SYM_FREQ;
  uint32_t c = ((uint32_t)sym) << 8;
  uint32_t x = *state;
  while (x >= (f << 16)) {
    rans_emit_byte(out, outsz, (uint8_t)(x & 0xFFu));
    x >>= 8;
  }
  uint32_t q = x / f;
  uint32_t r = x % f;
  x = q * TOTFREQ + c + r;
  *state = x;
}
static size_t rans_encode(const uint8_t *in, int n, uint8_t *out) {
  uint32_t st = RANS_L;
  size_t outsz = 0;
  for (int i = 0; i < n; i++) {
    uint8_t sym = (uint8_t)(in[i] & 0x0Fu);
    rans_put_symbol(&st, sym, out, &outsz);
  }
  for (int i = 0; i < 4; i++) {
    rans_emit_byte(out, &outsz, (uint8_t)(st & 0xFFu));
    st >>= 8;
  }
  return outsz;
}
static double pipeline_run(int n, const uint8_t *inbuf, uint8_t *outbuf) {
  size_t outsz = rans_encode(inbuf, n, outbuf);
  uint64_t acc = 0;
  for (size_t i = 0; i < outsz; i++)
    acc = (acc + (uint64_t)outbuf[i]) & 0xFFFFFFFFFFFFull;
  double v = 0.0;

  v = (double)acc;
  return v;
}
BENCH_MAIN_SCALAR3(
    T004_Module_036, RANSENC, 4096, 16384, 65536,
    uint8_t *inbuf = (uint8_t *)malloc((size_t)n);
    uint8_t *outbuf = (uint8_t *)malloc((size_t)(n * 8 + 16));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++)
        inbuf[i] = (uint8_t)(bench_rng_next(&rng) & 0x0Fu);
    },
    ans_scalar = pipeline_run(n, inbuf, outbuf), ans_scalar, free(inbuf);
    free(outbuf);)
