
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdlib.h>
void kernel_run(int Cin, int Cout, int H, int W, const double *x,
                const double *w, const double *b, double *y) {
  int HW = H * W;

  for (int oc = 0; oc < Cout; oc++) {
    for (int hw = 0; hw < HW; hw++) {
      double acc = b[oc];
      for (int ic = 0; ic < Cin; ic++) {
        acc += w[oc * Cin + ic] * x[ic * HW + hw];
      }
      y[oc * HW + hw] = acc;
    }
  }
}
BENCH_MAIN_ARRAY3_D(
    T002_Ops_045, OP75, 4096, 16384, 65536, int H = 32; int W = 32;
    int Cin = (case_id == 1 ? 8 : (case_id == 2 ? 16 : 32));
    int Cout = (case_id == 1 ? 16 : (case_id == 2 ? 32 : 64)); int HW = H * W;
    double *x = malloc((size_t)(Cin * HW) * sizeof(double));
    double *w = malloc((size_t)(Cout * Cin) * sizeof(double));
    double *b = malloc((size_t)Cout * sizeof(double));
    double *y = malloc((size_t)(Cout * HW) * sizeof(double)),
    bench_fill_array(x, (size_t)(Cin * HW), bench_seed(75));
    bench_fill_array(w, (size_t)(Cout * Cin), bench_seed(75) ^ 0x1u);
    bench_fill_array(b, (size_t)Cout, bench_seed(75) ^ 0x2u);
    , kernel_run(Cin, Cout, H, W, x, w, b, y), y, (size_t)(Cout * HW), free(x);
    free(w); free(b); free(y))
