
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdlib.h>
void kernel_run(int M, int K, int N, const double *A, const double *B,
                const double *bias_row, double *C) {

  for (int i = 0; i < M; i++) {
    for (int j = 0; j < N; j++) {
      double acc = bias_row[i];
      for (int k = 0; k < K; k++) {
        acc += A[i * K + k] * B[k * N + j];
      }
      C[i * N + j] = acc;
    }
  }
}
BENCH_MAIN_ARRAY3_D(
    T002_Ops_054, OP84, 4096, 16384, 65536,
    int M = (case_id == 1 ? 64 : (case_id == 2 ? 128 : 256));
    int K = M; int N = M; double *A = malloc((size_t)(M * K) * sizeof(double));
    double *B = malloc((size_t)(K * N) * sizeof(double));
    double *bias_row = malloc((size_t)M * sizeof(double));
    double *C = malloc((size_t)(M * N) * sizeof(double)),
    bench_fill_array(A, (size_t)(M * K), bench_seed(84));
    bench_fill_array(B, (size_t)(K * N), bench_seed(84) ^ 0x1u);
    bench_fill_array(bias_row, (size_t)M, bench_seed(84) ^ 0x2u);
    , kernel_run(M, K, N, A, B, bias_row, C), C, (size_t)(M * N), free(A);
    free(B); free(bias_row); free(C))
