#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
static inline int clampi_int(int v, int lo, int hi) {
  if (v < lo)
    return lo;
  if (v > hi)
    return hi;
  return v;
}
static void precompute_offsets(int w, int h, unsigned char *ptype) {
  int n = w * h;
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
      int idx = y * w + x;
      if ((y & 1) == 0) {
        if ((x & 1) == 0) {
          ptype[idx] = 1;
        } else {
          ptype[idx] = 0;
        }
      } else {
        if ((x & 1) == 0) {
          ptype[idx] = 3;
        } else {
          ptype[idx] = 2;
        }
      }
    }
  }
}
static inline double get_raw_px(const uint8_t *raw, int w, int h, int x,
                                int y) {
  int xx = clampi_int(x, 0, w - 1);
  int yy = clampi_int(y, 0, h - 1);
  return (double)raw[yy * w + xx];
}
static void interpolate_grbg(const uint8_t *raw, const unsigned char *ptype,
                             int w, int h, double *rgb) {
  int n = w * h;
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
      int idx = y * w + x;
      unsigned char t = ptype[idx];
      double Rv = 0.0;
      double Gv = 0.0;
      double Bv = 0.0;
      if (t == 0) {
        double c = get_raw_px(raw, w, h, x, y);
        double g1 = get_raw_px(raw, w, h, x - 1, y);
        double g2 = get_raw_px(raw, w, h, x + 1, y);
        double g3 = get_raw_px(raw, w, h, x, y - 1);
        double g4 = get_raw_px(raw, w, h, x, y + 1);
        double b1 = get_raw_px(raw, w, h, x - 1, y - 1);
        double b2 = get_raw_px(raw, w, h, x + 1, y - 1);
        double b3 = get_raw_px(raw, w, h, x - 1, y + 1);
        double b4 = get_raw_px(raw, w, h, x + 1, y + 1);
        Rv = c;
        Gv = 0.25 * (g1 + g2 + g3 + g4);
        Bv = 0.25 * (b1 + b2 + b3 + b4);
      } else if (t == 3) {
        double c = get_raw_px(raw, w, h, x, y);
        double g1 = get_raw_px(raw, w, h, x - 1, y);
        double g2 = get_raw_px(raw, w, h, x + 1, y);
        double g3 = get_raw_px(raw, w, h, x, y - 1);
        double g4 = get_raw_px(raw, w, h, x, y + 1);
        double r1 = get_raw_px(raw, w, h, x - 1, y - 1);
        double r2 = get_raw_px(raw, w, h, x + 1, y - 1);
        double r3 = get_raw_px(raw, w, h, x - 1, y + 1);
        double r4 = get_raw_px(raw, w, h, x + 1, y + 1);
        Bv = c;
        Gv = 0.25 * (g1 + g2 + g3 + g4);
        Rv = 0.25 * (r1 + r2 + r3 + r4);
      } else if (t == 1) {
        double c = get_raw_px(raw, w, h, x, y);
        double rl = get_raw_px(raw, w, h, x - 1, y);
        double rr = get_raw_px(raw, w, h, x + 1, y);
        double bu = get_raw_px(raw, w, h, x, y - 1);
        double bd = get_raw_px(raw, w, h, x, y + 1);
        Gv = c;
        Rv = 0.5 * (rl + rr);
        Bv = 0.5 * (bu + bd);
      } else {
        double c = get_raw_px(raw, w, h, x, y);
        double bl = get_raw_px(raw, w, h, x - 1, y);
        double br = get_raw_px(raw, w, h, x + 1, y);
        double ru = get_raw_px(raw, w, h, x, y - 1);
        double rd = get_raw_px(raw, w, h, x, y + 1);
        Gv = c;
        Rv = 0.5 * (ru + rd);
        Bv = 0.5 * (bl + br);
      }
      rgb[3 * idx + 0] = Rv;
      rgb[3 * idx + 1] = Gv;
      rgb[3 * idx + 2] = Bv;
    }
  }
}
static void edge_smooth(const double *rgb_in, int w, int h, double *rgb_out) {
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
      double sr = 0.0;
      double sg = 0.0;
      double sb = 0.0;
      for (int dy = -1; dy <= 1; dy++) {
        for (int dx = -1; dx <= 1; dx++) {
          int xx = clampi_int(x + dx, 0, w - 1);
          int yy = clampi_int(y + dy, 0, h - 1);
          int idx2 = yy * w + xx;
          sr += rgb_in[3 * idx2 + 0];
          sg += rgb_in[3 * idx2 + 1];
          sb += rgb_in[3 * idx2 + 2];
        }
      }
      int idx = y * w + x;
      rgb_out[3 * idx + 0] = sr / 9.0;
      rgb_out[3 * idx + 1] = sg / 9.0;
      rgb_out[3 * idx + 2] = sb / 9.0;
    }
  }
}
static double pipeline_run(int w, int h, const uint8_t *raw) {
  int n = w * h;
  unsigned char *ptype =
      (unsigned char *)malloc((size_t)n * sizeof(unsigned char));
  double *rgb = (double *)malloc((size_t)(3 * n) * sizeof(double));
  double *sm = (double *)malloc((size_t)(3 * n) * sizeof(double));
  precompute_offsets(w, h, ptype);
  interpolate_grbg(raw, ptype, w, h, rgb);
  edge_smooth(rgb, w, h, sm);
  double acc = 0.0;
  for (int i = 0; i < 3 * n; i++)
    acc += sm[i];
  free(ptype);
  free(rgb);
  free(sm);
  double ans = 0.0;

  ans = acc;
  return ans;
}
BENCH_MAIN_SCALAR3(
    T004_Module_001, IMG01, 4096, 16384, 65536,
    int w = (int)(sqrt((double)n) + 0.5);
    int h = w; uint8_t *raw = (uint8_t *)malloc((size_t)(n) * sizeof(uint8_t));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++)
        raw[i] = (uint8_t)(bench_rng_next(&rng) & 255ULL);
    },
    ans_scalar = pipeline_run(w, h, raw), ans_scalar, free(raw);)
