#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdlib.h>
static const int16_t seg_end_tbl[8] = {0x1F,  0x3F,  0x7F,  0xFF,
                                       0x1FF, 0x3FF, 0x7FF, 0xFFF};
static inline uint8_t mulaw_encode_sample(int16_t pcm) {
  int sign = 0;
  if (pcm < 0) {
    pcm = (int16_t)(-pcm);
    sign = 1;
  }
  if (pcm > 32635)
    pcm = 32635;
  int sample = pcm + 132;
  int seg = 0;
  while (seg < 8 && sample > seg_end_tbl[seg])
    seg++;
  if (seg > 7)
    seg = 7;
  uint8_t uval = (uint8_t)((seg << 4) | ((sample >> (seg + 3)) & 0x0F));
  if (sign)
    uval |= 0x80;
  uval = (uint8_t)(~uval);
  return uval;
}
static inline int16_t mulaw_decode_sample(uint8_t u) {
  u = (uint8_t)(~u);
  int sign = u & 0x80;
  int seg = (u >> 4) & 0x07;
  int quant = u & 0x0F;
  int sample = (((quant << 3) + 0x84) << seg) - 132;
  if (sign)
    sample = -sample;
  return (int16_t)sample;
}
static double pipeline_run(int n, const int16_t *pcm_in, uint8_t *enc,
                           int16_t *dec) {

  for (int i = 0; i < n; i++) {
    enc[i] = mulaw_encode_sample(pcm_in[i]);
  }
  for (int i = 0; i < n; i++) {
    dec[i] = mulaw_decode_sample(enc[i]);
  }

  long long err = 0;
  for (int i = 0; i < n; i++) {
    int d = (int)pcm_in[i] - (int)dec[i];
    err += (long long)(d * d);
  }
  return (double)err;
}
BENCH_MAIN_SCALAR3(
    T004_Module_019, MULAWC, 4096, 16384, 65536,
    int16_t *pcm_in = (int16_t *)malloc((size_t)n * sizeof(int16_t));
    uint8_t *enc = (uint8_t *)malloc((size_t)n * sizeof(uint8_t));
    int16_t *dec = (int16_t *)malloc((size_t)n * sizeof(int16_t));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++) {
        pcm_in[i] = (int16_t)(bench_rng_next(&rng) & 0xFFFFu);
      }
    },
    ans_scalar = pipeline_run(n, pcm_in, enc, dec), ans_scalar, free(pcm_in);
    free(enc); free(dec);)
