#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdlib.h>
#define NSTATES 4
static inline void conv_encode_bits(const uint8_t *bits, int nbits,
                                    uint8_t *enc) {
  int prev1 = 0;
  int prev2 = 0;
  for (int i = 0; i < nbits; i++) {
    int b = bits[i] & 1;
    int o0 = b ^ prev1 ^ prev2;
    int o1 = b ^ prev2;
    enc[2 * i + 0] = (uint8_t)(o0 & 1);
    enc[2 * i + 1] = (uint8_t)(o1 & 1);
    prev2 = prev1;
    prev1 = b;
  }
}
static inline void add_noise_bits(const uint8_t *enc, int len, uint8_t *noisy,
                                  bench_rng64_t *rng) {
  for (int i = 0; i < len; i++) {
    uint8_t v = enc[i];
    if ((bench_rng_next(rng) % 10ULL) == 0ULL) {
      v ^= 1u;
    }
    noisy[i] = v;
  }
}
static void viterbi_decode_bits(const uint8_t *recv, int nbits,
                                uint8_t *out_bits) {
  int T = nbits;
  int *bp_state = (int *)malloc((size_t)(T * NSTATES) * sizeof(int));
  uint8_t *bp_bit = (uint8_t *)malloc((size_t)(T * NSTATES) * sizeof(uint8_t));
  int metrics_curr[NSTATES];
  int metrics_next[NSTATES];
  for (int s = 0; s < NSTATES; s++) {
    if (s == 0)
      metrics_curr[s] = 0;
    else
      metrics_curr[s] = 1000000000;
  }
  for (int t = 0; t < T; t++) {
    for (int s = 0; s < NSTATES; s++)
      metrics_next[s] = 1000000000;
    uint8_t r0 = recv[2 * t + 0];
    uint8_t r1 = recv[2 * t + 1];
    for (int s = 0; s < NSTATES; s++) {
      int prev1 = (s >> 1) & 1;
      int prev2 = s & 1;
      for (int b = 0; b <= 1; b++) {
        int o0 = (b ^ prev1 ^ prev2) & 1;
        int o1 = (b ^ prev2) & 1;
        int cost = (o0 != r0) + (o1 != r1);
        int ns = ((b << 1) | prev1) & 3;
        int nm = metrics_curr[s] + cost;
        if (nm < metrics_next[ns]) {
          metrics_next[ns] = nm;
          bp_state[t * NSTATES + ns] = s;
          bp_bit[t * NSTATES + ns] = (uint8_t)b;
        }
      }
    }
    for (int s = 0; s < NSTATES; s++)
      metrics_curr[s] = metrics_next[s];
  }
  int best_state = 0;
  int best_metric = metrics_curr[0];
  for (int s = 1; s < NSTATES; s++) {
    if (metrics_curr[s] < best_metric) {
      best_metric = metrics_curr[s];
      best_state = s;
    }
  }
  int cs = best_state;
  for (int t = T - 1; t >= 0; t--) {
    int idx = t * NSTATES + cs;
    uint8_t b = bp_bit[idx];
    out_bits[t] = b;
    cs = bp_state[idx];
  }
  free(bp_state);
  free(bp_bit);
}
static double pipeline_run(int nbits, const uint8_t *recv, const uint8_t *orig,
                           uint8_t *decoded) {

  viterbi_decode_bits(recv, nbits, decoded);

  long long err = 0;
  for (int i = 0; i < nbits; i++) {
    if ((decoded[i] & 1u) != (orig[i] & 1u))
      err++;
  }
  return (double)err;
}
BENCH_MAIN_SCALAR3(
    T004_Module_020, VITDEC, 4096, 16384, 65536, int nbits = n / 16;
    uint8_t *bits = (uint8_t *)malloc((size_t)nbits * sizeof(uint8_t));
    uint8_t *enc = (uint8_t *)malloc((size_t)(2 * nbits) * sizeof(uint8_t));
    uint8_t *noisy = (uint8_t *)malloc((size_t)(2 * nbits) * sizeof(uint8_t));
    uint8_t *decoded = (uint8_t *)malloc((size_t)nbits * sizeof(uint8_t));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < nbits; i++) {
        bits[i] = (uint8_t)(bench_rng_next(&rng) & 1u);
      }
      conv_encode_bits(bits, nbits, enc);
      add_noise_bits(enc, 2 * nbits, noisy, &rng);
    },
    ans_scalar = pipeline_run(nbits, noisy, bits, decoded), ans_scalar,
    free(bits);
    free(enc); free(noisy); free(decoded);)
