#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define SEG_LEN 8
#define BITW 12
static inline void encode_rle(uint32_t value, int runlen, uint32_t *out,
                              size_t *outlen) {
  out[*outlen] = 0x80000000u | (uint32_t)runlen;
  out[*outlen + 1] = value;
  *outlen += 2;
}
static inline void encode_pack8x12(const uint32_t *vals, uint32_t *out,
                                   size_t *outlen) {
  uint32_t w[3];
  w[0] = 0;
  w[1] = 0;
  w[2] = 0;
  int bitpos = 0;
  for (int idx = 0; idx < SEG_LEN; idx++) {
    uint32_t v = vals[idx] & 0xFFFu;
    int nbits = BITW;
    while (nbits > 0) {
      int widx = bitpos / 32;
      int off = bitpos % 32;
      int space = 32 - off;
      int take = nbits < space ? nbits : space;
      uint32_t mask = (1u << take) - 1u;
      w[widx] |= ((v & mask) << off);
      v >>= take;
      nbits -= take;
      bitpos += take;
    }
  }
  out[*outlen] = ((uint32_t)BITW << 24) | (uint32_t)SEG_LEN;
  out[*outlen + 1] = w[0];
  out[*outlen + 2] = w[1];
  out[*outlen + 3] = w[2];
  *outlen += 4;
}
static double rlebp_run(int n, const uint32_t *col) {
  size_t outcap = (size_t)n * 4 + 16;
  uint32_t *outbuf = (uint32_t *)malloc(outcap * sizeof(uint32_t));
  if (!outbuf)
    return 0.0;
  size_t outlen = 0;
  int i = 0;
  while (i < n) {
    uint32_t v = col[i];
    int runlen = 1;
    while (i + runlen < n && col[i + runlen] == v && runlen < 0x00FFFFFF)
      runlen++;
    if (runlen >= 2) {
      encode_rle(v, runlen, outbuf, &outlen);
      i += runlen;
    } else {
      uint32_t tmp[SEG_LEN];
      int cnt = 0;
      while (cnt < SEG_LEN && i < n) {
        tmp[cnt++] = col[i++];
        if (i < n) {
          uint32_t v2 = col[i];
          int r2 = 1;
          while (i + r2 < n && col[i + r2] == v2 && r2 < 0x00FFFFFF)
            r2++;
          if (r2 >= 2)
            break;
        }
      }
      while (cnt < SEG_LEN) {
        tmp[cnt] = 0;
        cnt++;
      }
      encode_pack8x12(tmp, outbuf, &outlen);
    }
  }
  uint64_t h =
      bench_hash_bytes((const void *)outbuf, outlen * sizeof(uint32_t));
  free(outbuf);
  double ans_once = (double)(h);
  double ans = 0.0;

  ans = ans_once;
  return ans;
}
BENCH_MAIN_SCALAR3(
    T004_Module_054, RLEBP, 4096, 16384, 65536,
    uint32_t *col = (uint32_t *)malloc((size_t)n * sizeof(uint32_t));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++) {
        if ((bench_rng_next(&rng) & 3ULL) == 0 && i > 0) {
          col[i] = col[i - 1];
        } else {
          col[i] = (uint32_t)(bench_rng_next(&rng) & 0xFFFu);
        }
      }
    },
    ans_scalar = rlebp_run(n, col), ans_scalar, free(col);)
