
#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define NSEC 4
static const double b0_tab[NSEC] = {0.5, 1.2, 0.9, 1.1};
static const double b1_tab[NSEC] = {0.0, -0.8, -0.3, -0.4};
static const double b2_tab[NSEC] = {-0.5, 0.6, -0.2, 0.1};
static const double a1_tab[NSEC] = {-0.3, 0.4, -0.6, 0.2};
static const double a2_tab[NSEC] = {0.1, -0.2, 0.3, -0.4};
static inline void biquad_process(double b0, double b1, double b2, double a1,
                                  double a2, double *z1, double *z2,
                                  const double *in, double *out, int n) {
  double s1 = *z1;
  double s2 = *z2;
  for (int i = 0; i < n; i++) {
    double x = in[i];
    double y = b0 * x + s1;
    s1 = b1 * x - a1 * y + s2;
    s2 = b2 * x - a2 * y;
    out[i] = y;
  }
  *z1 = s1;
  *z2 = s2;
}
static double pipeline_run(int n, const double *src, double *dst) {
  double *tmp1 = (double *)malloc((size_t)n * sizeof(double));
  double *tmp2 = (double *)malloc((size_t)n * sizeof(double));
  double z1[NSEC];
  double z2[NSEC];
  double energy = 0.0;

  for (int k = 0; k < NSEC; k++) {
    z1[k] = 0.0;
    z2[k] = 0.0;
  }
  const double *cur_in = src;
  double *cur_out = tmp1;
  for (int sec = 0; sec < NSEC; sec++) {
    biquad_process(b0_tab[sec], b1_tab[sec], b2_tab[sec], a1_tab[sec],
                   a2_tab[sec], &z1[sec], &z2[sec], cur_in, cur_out, n);
    if (sec + 1 < NSEC) {
      if (cur_in == src) {
        cur_in = tmp1;
        cur_out = tmp2;
      } else if (cur_in == tmp1) {
        cur_in = tmp2;
        cur_out = tmp1;
      } else {
        cur_in = tmp1;
        cur_out = tmp2;
      }
    } else {
      for (int i = 0; i < n; i++) {
        dst[i] = cur_out[i];
      }
    }
  }

  for (int i = 0; i < n; i++) {
    double v = dst[i];
    energy += v * v;
  }
  free(tmp1);
  free(tmp2);
  return energy;
}
BENCH_MAIN_SCALAR3(
    T004_Module_012, BIQEQT, 4096, 16384, 65536,
    double *src = (double *)malloc((size_t)n * sizeof(double));
    double *dst = (double *)malloc((size_t)n * sizeof(double));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++) {
        src[i] = bench_rng_double_signed(&rng);
      }
    },
    ans_scalar = pipeline_run(n, src, dst), ans_scalar, free(src);
    free(dst);)
