#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
static inline void lu_factor_no_pivot(int n, double *A) {
  for (int k = 0; k < n; k++) {
    double pivot = A[k * n + k];
    for (int i = k + 1; i < n; i++) {
      A[i * n + k] /= pivot;
    }
    for (int i = k + 1; i < n; i++) {
      double lik = A[i * n + k];
      for (int j = k + 1; j < n; j++) {
        A[i * n + j] -= lik * A[k * n + j];
      }
    }
  }
}
static inline void solve3(int n, const double *LU, double *b0, double *b1,
                          double *b2) {
  for (int i = 0; i < n; i++) {
    for (int j = 0; j < i; j++) {
      double Lij = LU[i * n + j];
      b0[i] -= Lij * b0[j];
      b1[i] -= Lij * b1[j];
      b2[i] -= Lij * b2[j];
    }
  }
  for (int i = n - 1; i >= 0; i--) {
    for (int j = i + 1; j < n; j++) {
      double Uij = LU[i * n + j];
      b0[i] -= Uij * b0[j];
      b1[i] -= Uij * b1[j];
      b2[i] -= Uij * b2[j];
    }
    double Uii = LU[i * n + i];
    b0[i] /= Uii;
    b1[i] /= Uii;
    b2[i] /= Uii;
  }
}
static double run_lu(int n, double *A, double *b0, double *b1, double *b2) {
  lu_factor_no_pivot(n, A);
  solve3(n, A, b0, b1, b2);
  double s = 0.0;
  for (int i = 0; i < n; i++) {
    s += b0[i] + b1[i] + b2[i];
  }
  double outv = 0.0;
  outv = s;
  return outv;
}
BENCH_MAIN_SCALAR3(
    T004_Module_045, LU3, 4096, 16384, 65536,
    int Ndim = (int)(sqrt((double)n) + 0.5);
    double *A = (double *)malloc((size_t)Ndim * (size_t)Ndim * sizeof(double));
    double *b0 = (double *)malloc((size_t)Ndim * sizeof(double));
    double *b1 = (double *)malloc((size_t)Ndim * sizeof(double));
    double *b2 = (double *)malloc((size_t)Ndim * sizeof(double));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < Ndim; i++) {
        for (int j = 0; j < Ndim; j++) {
          A[i * Ndim + j] = 0.1 * bench_rng_double_signed(&rng);
        }
        A[i * Ndim + i] += (double)Ndim;
      }
      for (int i = 0; i < Ndim; i++) {
        b0[i] = bench_rng_double_signed(&rng);
        b1[i] = bench_rng_double_signed(&rng);
        b2[i] = bench_rng_double_signed(&rng);
      }
    },
    ans_scalar = run_lu(Ndim, A, b0, b1, b2), ans_scalar, free(A);
    free(b0); free(b1); free(b2);)
