
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdlib.h>
void kernel_run(int B, int D, const double *a, const double *b, double *C) {

  for (int p = 0; p < D; p++) {
    for (int q = 0; q < D; q++) {
      double acc = 0.0;
      for (int n = 0; n < B; n++) {
        double va = a[n * D + p];
        double vb = b[n * D + q];
        acc += va * vb;
      }
      C[p * D + q] = acc;
    }
  }
}
BENCH_MAIN_ARRAY3_D(T002_Ops_058, OP88, 256, 1024, 4096, int B = 128;
                    int D = (case_id == 1 ? 16 : (case_id == 2 ? 32 : 64));
                    double *a = malloc((size_t)(B * D) * sizeof(double));
                    double *b = malloc((size_t)(B * D) * sizeof(double));
                    double *C = malloc((size_t)(D * D) * sizeof(double)),
                    bench_fill_array(a, (size_t)(B * D), bench_seed(88));
                    bench_fill_array(b, (size_t)(B * D),
                                     bench_seed(88) ^ 0x33u);
                    , kernel_run(B, D, a, b, C), C, (size_t)(D * D), free(a);
                    free(b); free(C))
