#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define IDX(i, j, n) ((size_t)(i) * (size_t)(n) + (size_t)(j))
#define ID3(i, j, k, n)                                                        \
  (((size_t)(i) * (size_t)(n) + (size_t)(j)) * (size_t)(n) + (size_t)(k))
void kernel_run(int n, double *A, double *B) {

  for (int i = 1; i < n - 1; i++)
    for (int j = 1; j < n - 1; j++)
      for (int k = 1; k < n - 1; k++) {
        B[ID3(i, j, k, n)] = (A[ID3(i, j, k, n)] + A[ID3(i - 1, j, k, n)] +
                              A[ID3(i + 1, j, k, n)] + A[ID3(i, j - 1, k, n)] +
                              A[ID3(i, j + 1, k, n)] + A[ID3(i, j, k - 1, n)] +
                              A[ID3(i, j, k + 1, n)]) *
                             (1.0 / 7.0);
      }
  for (int i = 1; i < n - 1; i++)
    for (int j = 1; j < n - 1; j++)
      for (int k = 1; k < n - 1; k++) {
        A[ID3(i, j, k, n)] = B[ID3(i, j, k, n)];
      }
}
BENCH_MAIN_ARRAY3_D(T001_Loops_024, 15, 32, 48, 64,
                    double *A = malloc((size_t)n * n * n * sizeof(double));
                    double *B = malloc((size_t)n * n * n * sizeof(double));
                    , bench_fill_array(A, (size_t)n * n * n, bench_seed(111));
                    memset(B, 0, (size_t)n * n * n * sizeof(double));
                    , kernel_run(n, A, B);, A, (size_t)n * n * n, free(A);
                    free(B);)
