
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdlib.h>
void kernel_run(int M, int K, const double *A, const double *xvec,
                const double *bias, double *y) {

  for (int i = 0; i < M; i++) {
    double acc = bias[i];
    for (int k = 0; k < K; k++) {
      acc += A[i * K + k] * xvec[k];
    }
    y[i] = acc;
  }
}
BENCH_MAIN_ARRAY3_D(T002_Ops_055, OP85, 16384, 32768, 65536, int K = 128;
                    int M = (case_id == 1 ? 128 : (case_id == 2 ? 256 : 512));
                    double *A = malloc((size_t)(M * K) * sizeof(double));
                    double *xvec = malloc((size_t)K * sizeof(double));
                    double *bias = malloc((size_t)M * sizeof(double));
                    double *y = malloc((size_t)M * sizeof(double)),
                    bench_fill_array(A, (size_t)(M * K), bench_seed(85));
                    bench_fill_array(xvec, (size_t)K, bench_seed(85) ^ 0x44u);
                    bench_fill_array(bias, (size_t)M, bench_seed(85) ^ 0x55u);
                    , kernel_run(M, K, A, xvec, bias, y), y, (size_t)M, free(A);
                    free(xvec); free(bias); free(y))
