
#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int R, int C, int G, const double *x, double *y) {
  int Cs = C / G;

  for (int r = 0; r < R; r++) {
    const double *xr = x + r * C;
    double *yr = y + r * C;
    for (int c = 0; c < C; c++) {
      int g = c % G;
      int k = c / G;
      int c2 = g * Cs + k;
      yr[c2] = xr[c];
    }
  }
}
BENCH_MAIN_ARRAY3_D(T002_Ops_029, OP29, 4096, 8192, 16384, int C = 64;
                    int R = n / C; int G = 8;
                    double *x = malloc((size_t)R * C * sizeof(double));
                    double *y = malloc((size_t)R * C * sizeof(double)),
                    bench_fill_array(x, (size_t)R *C, bench_seed(29));
                    , kernel_run(R, C, G, x, y), y, (size_t)R *C, free(x);
                    free(y))
