
#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int R, int D, const double *x, const double *g, const double *b,
                double *y, double eps) {

  for (int r = 0; r < R; r++) {
    const double *xr = x + r * D;
    double *yr = y + r * D;
    double mean = 0.0;
    for (int i = 0; i < D; i++)
      mean += xr[i];
    mean /= D;
    double var = 0.0;
    for (int i = 0; i < D; i++) {
      double d = xr[i] - mean;
      var += d * d;
    }
    var /= D;
    double inv = 1.0 / sqrt(var + eps);
    for (int i = 0; i < D; i++) {
      yr[i] = (xr[i] - mean) * inv * g[i] + b[i];
    }
  }
}
BENCH_MAIN_ARRAY3_D(T002_Ops_009, OP09, 4096, 8192, 16384, int D = 64;
                    int R = n / D;
                    double *x = malloc((size_t)R * D * sizeof(double));
                    double *y = malloc((size_t)R * D * sizeof(double));
                    double *g = malloc((size_t)D * sizeof(double));
                    double *b = malloc((size_t)D * sizeof(double)),
                    bench_fill_array(x, (size_t)R *D, bench_seed(9));
                    bench_fill_array(g, D, bench_seed(9) ^ 0x1111);
                    bench_fill_array(b, D, bench_seed(9) ^ 0x2222);
                    , kernel_run(R, D, x, g, b, y, 1e-5), y, (size_t)R *D,
                    free(x);
                    free(y); free(g); free(b))
