#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define IDX(i, j, n) ((size_t)(i) * (size_t)(n) + (size_t)(j))
void kernel_run(int n, double *rs, const double *A, const double *p,
                const double *q) {
  double *r = rs;
  double *s = rs + n;

  for (int i = 0; i < n; i++) {
    double sum = 0.0;
    for (int j = 0; j < n; j++)
      sum += A[IDX(i, j, n)] * p[j];
    r[i] = sum;
  }
  for (int j = 0; j < n; j++) {
    double sum = 0.0;
    for (int i = 0; i < n; i++)
      sum += A[IDX(i, j, n)] * q[i];
    s[j] = sum;
  }
}
BENCH_MAIN_ARRAY3_D(T001_Loops_019, 09, 128, 256, 512,
                    double *A = malloc((size_t)n * n * sizeof(double));
                    double *p = malloc((size_t)n * sizeof(double));
                    double *q = malloc((size_t)n * sizeof(double));
                    double *rs = malloc((size_t)2 * n * sizeof(double));
                    , bench_fill_array(A, (size_t)n *n, bench_seed(15));
                    bench_fill_array(p, n, bench_seed(16));
                    bench_fill_array(q, n, bench_seed(17));
                    , kernel_run(n, rs, A, p, q);, rs, (size_t)2 * n, free(A);
                    free(p); free(q); free(rs);)
