
#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int R, int D, const double *x, double *y) {

  for (int r = 0; r < R; r++) {
    const double *xr = x + r * D;
    double *yr = y + r * D;
    double m = xr[0];
    for (int i = 1; i < D; i++)
      if (xr[i] > m)
        m = xr[i];
    double s = 0.0;
    for (int i = 0; i < D; i++) {
      yr[i] = exp(xr[i] - m);
      s += yr[i];
    }
    double inv = 1.0 / s;
    for (int i = 0; i < D; i++)
      yr[i] *= inv;
  }
}
BENCH_MAIN_ARRAY3_D(T002_Ops_012, OP12, 4096, 8192, 16384, int D = 64;
                    int R = n / D;
                    double *x = malloc((size_t)R * D * sizeof(double));
                    double *y = malloc((size_t)R * D * sizeof(double)),
                    bench_fill_array(x, (size_t)R *D, bench_seed(12));
                    , kernel_run(R, D, x, y), y, (size_t)R *D, free(x); free(y))
