
#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int N, double (*px)[25], const double (*vy)[25],
                const double (*cx)[25]) {

  for (int k = 0; k < 25; k++) {
    for (int i = 0; i < 25; i++) {
      for (int j = 0; j < N; j++) {
        px[j][i] += vy[k][i] * cx[j][k];
      }
    }
  }
}
BENCH_MAIN_ARRAY3_D(
    T001_Loops_013, 21, 64, 128, 256, int N = n;
    double (*px)[25] = malloc((size_t)N * 25 * sizeof(double));
    double (*vy)[25] = malloc((size_t)25 * 25 * sizeof(double));
    double (*cx)[25] = malloc((size_t)N * 25 * sizeof(double)),
    bench_fill_array((double *)px, (size_t)N * 25, bench_seed(7) ^ 0xcccc);
    bench_fill_array((double *)vy, (size_t)25 * 25, bench_seed(7) ^ 0xdddd);
    bench_fill_array((double *)cx, (size_t)N * 25, bench_seed(7) ^ 0xeeee),
    kernel_run(n, px, (const double (*)[25])vy, (const double (*)[25])cx),
    (const double *)px, (size_t)n * 25, free(px);
    free(vy); free(cx))
