#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define IDX(i, j, n) ((size_t)(i) * (size_t)(n) + (size_t)(j))
void kernel_run(int n, const double *A, const double *x, double *y,
                double *tmp) {

  for (int i = 0; i < n; i++) {
    double s = 0.0;
    for (int j = 0; j < n; j++)
      s += A[IDX(i, j, n)] * x[j];
    tmp[i] = s;
  }
  for (int j = 0; j < n; j++) {
    double s = 0.0;
    for (int i = 0; i < n; i++)
      s += A[IDX(i, j, n)] * tmp[i];
    y[j] = s;
  }
}
BENCH_MAIN_ARRAY3_D(T001_Loops_018, 11, 128, 256, 384,
                    double *A = malloc((size_t)n * n * sizeof(double));
                    double *x = malloc((size_t)n * sizeof(double));
                    double *y = malloc((size_t)n * sizeof(double));
                    double *tmp = malloc((size_t)n * sizeof(double));
                    , bench_fill_array(A, (size_t)n *n, bench_seed(20));
                    bench_fill_array(x, n, bench_seed(21));
                    memset(y, 0, (size_t)n * sizeof(double));
                    , kernel_run(n, A, x, y, tmp);, y, n, free(A); free(x);
                    free(y); free(tmp);)
