#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
static inline int clampi_int7(int v, int lo, int hi) {
  if (v < lo)
    return lo;
  if (v > hi)
    return hi;
  return v;
}
static void energy_map_sobel(const uint8_t *img, int w, int h, double *eng) {
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
      int x0 = clampi_int7(x - 1, 0, w - 1);
      int x2 = clampi_int7(x + 1, 0, w - 1);
      int y0 = clampi_int7(y - 1, 0, h - 1);
      int y2 = clampi_int7(y + 1, 0, h - 1);
      int p00 = img[y0 * w + x0];
      int p02 = img[y0 * w + x2];
      int p10 = img[y * w + x0];
      int p12 = img[y * w + x2];
      int p20 = img[y2 * w + x0];
      int p22 = img[y2 * w + x2];
      int gx =
          (-1 * p00 + 1 * p02) + (-2 * p10 + 2 * p12) + (-1 * p20 + 1 * p22);
      int gy = (1 * p00 + -1 * p20) +
               (2 * img[y0 * w + x] - 2 * img[y2 * w + x]) +
               (1 * p02 + -1 * p22);
      eng[y * w + x] = fabs((double)gx) + fabs((double)gy);
    }
  }
}
static void compute_dp_min_seam(const double *eng, int w, int h, double *dp,
                                int *trace) {
  for (int x = 0; x < w; x++) {
    dp[x] = eng[x];
    trace[x] = -1;
  }
  for (int y = 1; y < h; y++) {
    for (int x = 0; x < w; x++) {
      double best = dp[(y - 1) * w + x];
      int bx = x;
      if (x > 0) {
        double v2 = dp[(y - 1) * w + (x - 1)];
        if (v2 < best) {
          best = v2;
          bx = x - 1;
        }
      }
      if (x + 1 < w) {
        double v2 = dp[(y - 1) * w + (x + 1)];
        if (v2 < best) {
          best = v2;
          bx = x + 1;
        }
      }
      dp[y * w + x] = eng[y * w + x] + best;
      trace[y * w + x] = bx;
    }
  }
}
static double backtrack_seam(const double *dp, const int *trace, int w, int h) {
  int y = h - 1;
  int best_x = 0;
  double best_val = dp[y * w + 0];
  for (int x = 1; x < w; x++) {
    double v = dp[y * w + x];
    if (v < best_val) {
      best_val = v;
      best_x = x;
    }
  }
  double acc = best_val;
  int cx = best_x;
  for (int yy = h - 1; yy >= 0; yy--) {
    int idx = yy * w + cx;
    cx = trace[idx];
    if (cx < 0)
      cx = best_x;
  }
  return acc;
}
static double pipeline_run(int w, int h, const uint8_t *img) {
  int n = w * h;
  double *eng = (double *)malloc((size_t)n * sizeof(double));
  double *dp = (double *)malloc((size_t)n * sizeof(double));
  int *trace = (int *)malloc((size_t)n * sizeof(int));
  energy_map_sobel(img, w, h, eng);
  compute_dp_min_seam(eng, w, h, dp, trace);
  double val = backtrack_seam(dp, trace, w, h);
  free(eng);
  free(dp);
  free(trace);
  double ans = 0.0;

  ans = val;
  return ans;
}
BENCH_MAIN_SCALAR3(
    T004_Module_009, IMG09, 4096, 16384, 65536,
    int w = (int)(sqrt((double)n) + 0.5);
    int h = w; uint8_t *img = (uint8_t *)malloc((size_t)(n) * sizeof(uint8_t));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++)
        img[i] = (uint8_t)(bench_rng_next(&rng) & 255ULL);
    },
    ans_scalar = pipeline_run(w, h, img), ans_scalar, free(img);)
