
#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int R, int C, const double *x, const double *mean,
                const double *var, const double *gamma, const double *beta,
                double *y, double eps) {

  for (int r = 0; r < R; r++) {
    const double *xr = x + r * C;
    double *yr = y + r * C;
    for (int c = 0; c < C; c++) {
      double inv = 1.0 / sqrt(var[c] + eps);
      yr[c] = gamma[c] * (xr[c] - mean[c]) * inv + beta[c];
    }
  }
}
BENCH_MAIN_ARRAY3_D(T002_Ops_010, OP10, 4096, 8192, 16384, int C = 64;
                    int R = n / C;
                    double *x = malloc((size_t)R * C * sizeof(double));
                    double *y = malloc((size_t)R * C * sizeof(double));
                    double *mean = malloc(C * sizeof(double));
                    double *var = malloc(C * sizeof(double));
                    double *g = malloc(C * sizeof(double));
                    double *b = malloc(C * sizeof(double)),
                    bench_fill_array(x, (size_t)R *C, bench_seed(10));
                    bench_fill_array(mean, C, bench_seed(10) ^ 0xA1);
                    bench_fill_array(var, C, bench_seed(10) ^ 0xA2);
                    bench_fill_array(g, C, bench_seed(10) ^ 0xA3);
                    bench_fill_array(b, C, bench_seed(10) ^ 0xA4);
                    , kernel_run(R, C, x, mean, var, g, b, y, 1e-5), y,
                    (size_t)R *C, free(x);
                    free(y); free(mean); free(var); free(g); free(b))
