#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#define BLKSZ_DN 16
#define TH_TIME 0.2
#define TH_FREQ 0.1
static inline void time_gate(const double *in, double *out) {
  for (int i = 0; i < BLKSZ_DN; i++) {
    double v = in[i];
    if (fabs(v) < TH_TIME)
      v *= 0.5;
    out[i] = v;
  }
}
static inline void dft16(const double *in, double *re, double *im) {
  const double PI = 3.14159265358979323846;
  for (int k = 0; k < BLKSZ_DN; k++) {
    double xr = 0.0;
    double xi = 0.0;
    for (int n2 = 0; n2 < BLKSZ_DN; n2++) {
      double ang = -2.0 * PI * (double)k * (double)n2 / (double)BLKSZ_DN;
      double c = cos(ang);
      double s = sin(ang);
      double v = in[n2];
      xr += v * c;
      xi += v * s;
    }
    re[k] = xr;
    im[k] = xi;
  }
}
static inline void soft_shrink(double *re, double *im) {
  for (int k = 0; k < BLKSZ_DN; k++) {
    double xr = re[k];
    double xi = im[k];
    double mag = sqrt(xr * xr + xi * xi);
    if (mag > TH_FREQ) {
      double scale = (mag - TH_FREQ) / mag;
      re[k] = xr * scale;
      im[k] = xi * scale;
    } else {
      re[k] = 0.0;
      im[k] = 0.0;
    }
  }
}
static inline void idft16(const double *re, const double *im, double *out) {
  const double PI = 3.14159265358979323846;
  for (int n2 = 0; n2 < BLKSZ_DN; n2++) {
    double xr = 0.0;
    for (int k = 0; k < BLKSZ_DN; k++) {
      double ang = 2.0 * PI * (double)k * (double)n2 / (double)BLKSZ_DN;
      double c = cos(ang);
      double s = sin(ang);
      xr += re[k] * c - im[k] * s;
    }
    out[n2] = xr / (double)BLKSZ_DN;
  }
}
static double pipeline_run(int n, const double *x) {
  int frames = n / BLKSZ_DN;
  double gated[BLKSZ_DN];
  double re[BLKSZ_DN];
  double im[BLKSZ_DN];
  double out[BLKSZ_DN];
  double energy = 0.0;

  double e_local = 0.0;
  for (int f = 0; f < frames; f++) {
    const double *src = x + f * BLKSZ_DN;
    time_gate(src, gated);
    dft16(gated, re, im);
    soft_shrink(re, im);
    idft16(re, im, out);
    for (int i = 0; i < BLKSZ_DN; i++) {
      double v = out[i];
      e_local += v * v;
    }
  }
  energy = e_local;

  return energy;
}
BENCH_MAIN_SCALAR3(
    T004_Module_018, DNOISE, 4096, 16384, 65536,
    double *x = (double *)malloc((size_t)n * sizeof(double));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++) {
        x[i] = bench_rng_double_signed(&rng);
      }
    },
    ans_scalar = pipeline_run(n, x), ans_scalar, free(x);)
