
#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int C, int H, int W, const double *x, const double *k3x3,
                double *y) {

  for (int c = 0; c < C; c++) {
    const double *kc = k3x3 + c * 9;
    for (int i = 0; i < H; i++) {
      for (int j = 0; j < W; j++) {
        double acc = 0.0;
        for (int di = -1; di <= 1; di++) {
          int ii = i + di;
          if (ii < 0)
            ii = 0;
          if (ii >= H)
            ii = H - 1;
          for (int dj = -1; dj <= 1; dj++) {
            int jj = j + dj;
            if (jj < 0)
              jj = 0;
            if (jj >= W)
              jj = W - 1;
            double w = kc[(di + 1) * 3 + (dj + 1)];
            acc += w * x[(c * H + ii) * W + jj];
          }
        }
        y[(c * H + i) * W + j] = acc;
      }
    }
  }
}
BENCH_MAIN_ARRAY3_D(T002_Ops_018, OP18, 32768, 65536, 131072,
                    int C = (case_id == 1 ? 8 : (case_id == 2 ? 16 : 32));
                    int H = 64; int W = 64; size_t N = (size_t)C * H * W;
                    double *x = malloc(N * sizeof(double));
                    double *y = malloc(N * sizeof(double));
                    double *k = malloc((size_t)C * 9 * sizeof(double)),
                    bench_fill_array(x, (size_t)C * H * W, bench_seed(18));
                    bench_fill_array(k, (size_t)C * 9, bench_seed(18) ^ 0xABCD);
                    , kernel_run(C, H, W, x, k, y), y, (size_t)C * H * W,
                    free(x);
                    free(y); free(k))
