#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
static inline void jacobi_step(int N, const double *u, const double *f,
                               double *unew) {
  for (int t = 0; t < N * N; t++) {
    unew[t] = u[t];
  }
  for (int i = 1; i < N - 1; i++) {
    int row = i * N;
    int rowm = (i - 1) * N;
    int rowp = (i + 1) * N;
    for (int j = 1; j < N - 1; j++) {
      int idx = row + j;
      unew[idx] = 0.25 * (u[rowm + j] + u[rowp + j] + u[row + j - 1] +
                          u[row + j + 1] - f[idx]);
    }
  }
}
static double run_jacobi(int N, double *u, double *tmp, const double *f) {
  double *cur = u;
  double *nxt = tmp;
  for (int it = 0; it < 20; it++) {
    jacobi_step(N, cur, f, nxt);
    double *swp = cur;
    cur = nxt;
    nxt = swp;
  }
  double res2 = 0.0;
  for (int i = 1; i < N - 1; i++) {
    int row = i * N;
    int rowm = (i - 1) * N;
    int rowp = (i + 1) * N;
    for (int j = 1; j < N - 1; j++) {
      int idx = row + j;
      double Au = (-4.0 * cur[idx] + cur[rowm + j] + cur[rowp + j] +
                   cur[row + j - 1] + cur[row + j + 1]);
      double rr = Au - f[idx];
      res2 += rr * rr;
    }
  }
  double norm = sqrt(res2);
  double outv = 0.0;
  outv = norm;
  return outv;
}
BENCH_MAIN_SCALAR3(
    T004_Module_042, JAC, 4096, 16384, 65536,
    int Ndim = (int)(sqrt((double)n) + 0.5);
    double *u = (double *)malloc((size_t)n * sizeof(double));
    double *tmp = (double *)malloc((size_t)n * sizeof(double));
    double *f = (double *)malloc((size_t)n * sizeof(double));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++) {
        u[i] = 0.0;
        tmp[i] = 0.0;
        f[i] = bench_rng_double_signed(&rng);
      }
    },
    ans_scalar = run_jacobi(Ndim, u, tmp, f), ans_scalar, free(u);
    free(tmp); free(f);)
