
#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int H, int W, int OH, int OW, const double *x, double *y) {
  double sx = (double)W / (double)OW;
  double sy = (double)H / (double)OH;

  for (int oy = 0; oy < OH; oy++) {
    double fy = (oy + 0.5) * sy - 0.5;
    int y0 = (int)floor(fy);
    int y1 = y0 + 1;
    double wy1 = fy - y0;
    double wy0 = 1.0 - wy1;
    if (y0 < 0) {
      y0 = 0;
      wy0 = 1.0;
      wy1 = 0.0;
    }
    if (y1 >= H) {
      y1 = H - 1;
      wy0 = 1.0;
      wy1 = 0.0;
    }
    for (int ox = 0; ox < OW; ox++) {
      double fx = (ox + 0.5) * sx - 0.5;
      int x0 = (int)floor(fx);
      int x1 = x0 + 1;
      double wx1 = fx - x0;
      double wx0 = 1.0 - wx1;
      if (x0 < 0) {
        x0 = 0;
        wx0 = 1.0;
        wx1 = 0.0;
      }
      if (x1 >= W) {
        x1 = W - 1;
        wx0 = 1.0;
        wx1 = 0.0;
      }
      double v00 = x[y0 * W + x0];
      double v01 = x[y0 * W + x1];
      double v10 = x[y1 * W + x0];
      double v11 = x[y1 * W + x1];
      double v0 = v00 * wx0 + v01 * wx1;
      double v1 = v10 * wx0 + v11 * wx1;
      y[oy * OW + ox] = v0 * wy0 + v1 * wy1;
    }
  }
}
BENCH_MAIN_ARRAY3_D(T002_Ops_030, OP30, 4096, 16384, 65536,
                    int H = (case_id == 1 ? 64 : (case_id == 2 ? 128 : 256));
                    int W = H; int OH = H * 2; int OW = W * 2;
                    double *x = malloc((size_t)H * W * sizeof(double));
                    double *y = malloc((size_t)OH * OW * sizeof(double)),
                    bench_fill_array(x, (size_t)H *W, bench_seed(30));
                    , kernel_run(H, W, OH, OW, x, y), y,
                    (size_t)((size_t)((H * 2) * (W * 2))), free(x);
                    free(y))
