#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define IDX(i, j, n) ((size_t)(i) * (size_t)(n) + (size_t)(j))
void kernel_run(int n, double *A, double *R) {

  for (int k = 0; k < n; k++) {
    double norm = 0.0;
    for (int i = 0; i < n; i++) {
      double v = A[IDX(i, k, n)];
      norm += v * v;
    }
    R[IDX(k, k, n)] = sqrt(norm);
    double inv = 1.0 / (R[IDX(k, k, n)] + 1e-12);
    for (int i = 0; i < n; i++)
      A[IDX(i, k, n)] *= inv;
    for (int j = k + 1; j < n; j++) {
      double r = 0.0;
      for (int i = 0; i < n; i++)
        r += A[IDX(i, k, n)] * A[IDX(i, j, n)];
      R[IDX(k, j, n)] = r;
      for (int i = 0; i < n; i++)
        A[IDX(i, j, n)] -= A[IDX(i, k, n)] * r;
    }
  }
}
BENCH_MAIN_ARRAY3_D(T001_Loops_023, 08, 64, 96, 128,
                    double *A = malloc((size_t)n * n * sizeof(double));
                    double *R = calloc((size_t)n * n, sizeof(double));
                    , bench_fill_array(A, (size_t)n *n, bench_seed(14));
                    , kernel_run(n, A, R);, A, (size_t)n *n, free(A); free(R);)
