
#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int R, int D, const double *x, double *y) {

  for (int r = 0; r < R; r++) {
    const double *xr = x + r * D;
    double *yr = y + r * D;
    double m = xr[0];
    for (int i = 1; i < D; i++)
      if (xr[i] > m)
        m = xr[i];
    double s = 0.0;
    for (int i = 0; i < D; i++)
      s += exp(xr[i] - m);
    double logZ = log(s) + m;
    for (int i = 0; i < D; i++)
      yr[i] = xr[i] - logZ;
  }
}
BENCH_MAIN_ARRAY3_D(T002_Ops_013, OP13, 4096, 8192, 16384, int D = 64;
                    int R = n / D;
                    double *x = malloc((size_t)R * D * sizeof(double));
                    double *y = malloc((size_t)R * D * sizeof(double)),
                    bench_fill_array(x, (size_t)R *D, bench_seed(13));
                    , kernel_run(R, D, x, y), y, (size_t)R *D, free(x); free(y))
