
#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int R, int D, const double *x, double *y, double eps) {

  for (int r = 0; r < R; r++) {
    const double *xr = x + r * D;
    double *yr = y + r * D;
    double s = 0.0;
    for (int i = 0; i < D; i++)
      s += xr[i] * xr[i];
    double rms = sqrt(s / D + eps);
    for (int i = 0; i < D; i++)
      yr[i] = xr[i] / rms;
  }
}
BENCH_MAIN_ARRAY3_D(T002_Ops_008, OP08, 4096, 8192, 16384, int D = 64;
                    int R = n / D;
                    double *x = malloc((size_t)R * D * sizeof(double));
                    double *y = malloc((size_t)R * D * sizeof(double)),
                    bench_fill_array(x, (size_t)R *D, bench_seed(8));
                    , kernel_run(R, D, x, y, 1e-5), y, (size_t)R *D, free(x);
                    free(y))
