#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
static inline int clampi_int4(int v, int lo, int hi) {
  if (v < lo)
    return lo;
  if (v > hi)
    return hi;
  return v;
}
static void sobel_tensor(const uint8_t *img, int w, int h, float *ix,
                         float *iy) {
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
      int x0 = clampi_int4(x - 1, 0, w - 1);
      int x1 = x;
      int x2 = clampi_int4(x + 1, 0, w - 1);
      int y0 = clampi_int4(y - 1, 0, h - 1);
      int y1 = y;
      int y2 = clampi_int4(y + 1, 0, h - 1);
      int p00 = img[y0 * w + x0];
      int p01 = img[y0 * w + x1];
      int p02 = img[y0 * w + x2];
      int p10 = img[y1 * w + x0];
      int p11 = img[y1 * w + x1];
      int p12 = img[y1 * w + x2];
      int p20 = img[y2 * w + x0];
      int p21 = img[y2 * w + x1];
      int p22 = img[y2 * w + x2];
      int gx =
          (-1 * p00 + 1 * p02) + (-2 * p10 + 2 * p12) + (-1 * p20 + 1 * p22);
      int gy = (1 * p00 + 2 * p01 + 1 * p02) + (-1 * p20 + -2 * p21 + -1 * p22);
      ix[y * w + x] = (float)gx;
      iy[y * w + x] = (float)gy;
    }
  }
}
static void eig_response(const float *ix, const float *iy, int w, int h,
                         float *resp) {
  const float k = 0.04f;
  for (int i = 0; i < w * h; i++) {
    float gx = ix[i];
    float gy = iy[i];
    float sxx = gx * gx;
    float syy = gy * gy;
    float sxy = gx * gy;
    float det = sxx * syy - sxy * sxy;
    float trace = sxx + syy;
    resp[i] = det - k * trace * trace;
  }
}
static double nms_pick(const float *resp, int w, int h) {
  const float thr = 1000000.0f;
  double count = 0.0;
  for (int y = 1; y < h - 1; y++) {
    for (int x = 1; x < w - 1; x++) {
      int idx = y * w + x;
      float v = resp[idx];
      if (v <= thr)
        continue;
      int ok = 1;
      for (int dy = -1; dy <= 1 && ok; dy++) {
        for (int dx = -1; dx <= 1; dx++) {
          if (dx == 0 && dy == 0)
            continue;
          int idx2 = (y + dy) * w + (x + dx);
          if (resp[idx2] > v) {
            ok = 0;
            break;
          }
        }
      }
      if (ok)
        count += 1.0;
    }
  }
  return count;
}
static double pipeline_run(int w, int h, const uint8_t *img) {
  int n = w * h;
  float *ix = (float *)malloc((size_t)n * sizeof(float));
  float *iy = (float *)malloc((size_t)n * sizeof(float));
  float *resp = (float *)malloc((size_t)n * sizeof(float));
  sobel_tensor(img, w, h, ix, iy);
  eig_response(ix, iy, w, h, resp);
  double val = nms_pick(resp, w, h);
  free(ix);
  free(iy);
  free(resp);
  double ans = 0.0;

  ans = val;
  return ans;
}
BENCH_MAIN_SCALAR3(
    T004_Module_006, IMG06, 4096, 16384, 65536,
    int w = (int)(sqrt((double)n) + 0.5);
    int h = w; uint8_t *img = (uint8_t *)malloc((size_t)(n) * sizeof(uint8_t));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++)
        img[i] = (uint8_t)(bench_rng_next(&rng) & 255ULL);
    },
    ans_scalar = pipeline_run(w, h, img), ans_scalar, free(img);)
