
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdlib.h>
void kernel_run(int rows, int cols, const double *x, int32_t *out_idx) {

  for (int r = 0; r < rows; r++) {
    int base = r * cols;
    double best = x[base + 0];
    int32_t best_i = 0;
    for (int c = 1; c < cols; c++) {
      double v = x[base + c];
      if (v > best) {
        best = v;
        best_i = (int32_t)c;
      }
    }
    out_idx[r] = best_i;
  }
}
BENCH_MAIN_ARRAY3_BYTES(
    T002_Ops_047, OP77, 16384, 32768, 65536,
    int rows = (case_id == 1 ? 128 : (case_id == 2 ? 256 : 512));
    int cols = 128; double *x = malloc((size_t)(rows * cols) * sizeof(double));
    int32_t *out_idx = malloc((size_t)rows * sizeof(int32_t)),
    bench_fill_array(x, (size_t)(rows * cols), bench_seed(77));
    , kernel_run(rows, cols, x, out_idx), out_idx,
    (size_t)rows * sizeof(int32_t), free(x);
    free(out_idx))
