#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
static inline int clampi_int5(int v, int lo, int hi) {
  if (v < lo)
    return lo;
  if (v > hi)
    return hi;
  return v;
}
static float *gauss_downsample_once(const float *inimg, int w, int h, int *ow,
                                    int *oh) {
  static const float k[5] = {1.f, 4.f, 6.f, 4.f, 1.f};
  float *tmp = (float *)malloc((size_t)(w * h) * sizeof(float));
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
      float acc = 0.0f;
      for (int t = -2; t <= 2; t++) {
        int xx = clampi_int5(x + t, 0, w - 1);
        acc += inimg[y * w + xx] * k[t + 2];
      }
      tmp[y * w + x] = acc / 16.0f;
    }
  }
  int nw = w / 2;
  int nh = h / 2;
  float *out = (float *)malloc((size_t)(nw * nh) * sizeof(float));
  for (int y2 = 0; y2 < nh; y2++) {
    int y0 = 2 * y2;
    for (int x2 = 0; x2 < nw; x2++) {
      int x0 = 2 * x2;
      float acc = 0.0f;
      for (int t = -2; t <= 2; t++) {
        int yy = clampi_int5(y0 + t, 0, h - 1);
        acc += tmp[yy * w + x0] * k[t + 2];
      }
      out[y2 * nw + x2] = acc / 16.0f;
    }
  }
  free(tmp);
  *ow = nw;
  *oh = nh;
  return out;
}
static double pipeline_run(int w0, int h0, const uint8_t *img) {
  int n0 = w0 * h0;
  float *cur = (float *)malloc((size_t)n0 * sizeof(float));
  for (int i = 0; i < n0; i++)
    cur[i] = (float)img[i];
  int cw = w0;
  int ch = h0;
  int levels = 4;
  for (int lv = 1; lv < levels; lv++) {
    int nw, nh;
    float *next = gauss_downsample_once(cur, cw, ch, &nw, &nh);
    free(cur);
    cur = next;
    cw = nw;
    ch = nh;
    if (cw < 2 || ch < 2)
      break;
  }
  double acc = 0.0;
  for (int i = 0; i < cw * ch; i++)
    acc += (double)cur[i];
  free(cur);
  double ans = 0.0;

  ans = acc;
  return ans;
}
BENCH_MAIN_SCALAR3(
    T004_Module_007, IMG07, 4096, 16384, 65536,
    int w = (int)(sqrt((double)n) + 0.5);
    int h = w; uint8_t *img = (uint8_t *)malloc((size_t)(n) * sizeof(uint8_t));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++)
        img[i] = (uint8_t)(bench_rng_next(&rng) & 255ULL);
    },
    ans_scalar = pipeline_run(w, h, img), ans_scalar, free(img);)
