
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdlib.h>
void kernel_run(int H, int W, const double *x, double *y) {

  for (int i = 0; i < H; i++) {
    for (int j = 0; j < W; j++) {
      y[j * H + i] = x[i * W + j];
    }
  }
}
BENCH_MAIN_ARRAY3_D(T002_Ops_056, OP86, 4096, 16384, 65536,
                    int H = (case_id == 1 ? 64 : (case_id == 2 ? 128 : 256));
                    int W = H;
                    double *x = malloc((size_t)(H * W) * sizeof(double));
                    double *y = malloc((size_t)(H * W) * sizeof(double)),
                    bench_fill_array(x, (size_t)(H * W), bench_seed(86));
                    , kernel_run(H, W, x, y), y, (size_t)(H * W), free(x);
                    free(y))
