#include "bench_harness.h"
#include "bench_utils.h"
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
static inline void spmv_csr(int n, const int *rowptr, const int *colind,
                            const double *aval, const double *x, double *y) {
  for (int i = 0; i < n; i++) {
    double s = 0.0;
    int a = rowptr[i];
    int b = rowptr[i + 1];
    for (int k = a; k < b; k++) {
      s += aval[k] * x[colind[k]];
    }
    y[i] = s;
  }
}
static inline double dotprod(int n, const double *a, const double *b) {
  double s = 0.0;
  for (int i = 0; i < n; i++) {
    s += a[i] * b[i];
  }
  return s;
}
static inline void vec_axpy(int n, double alpha, const double *x, double *y) {
  for (int i = 0; i < n; i++) {
    y[i] += alpha * x[i];
  }
}
static double run_cg(int n, const int *rowptr, const int *colind,
                     const double *aval, const double *b, double *x, double *r,
                     double *p, double *Ap) {
  for (int i = 0; i < n; i++) {
    x[i] = 0.0;
    r[i] = b[i];
    p[i] = r[i];
  }
  double rsold = dotprod(n, r, r);
  for (int it = 0; it < 20; it++) {
    spmv_csr(n, rowptr, colind, aval, p, Ap);
    double pAp = dotprod(n, p, Ap);
    double alpha = rsold / pAp;
    for (int i = 0; i < n; i++) {
      x[i] += alpha * p[i];
      r[i] -= alpha * Ap[i];
    }
    double rsnew = dotprod(n, r, r);
    double beta = rsnew / rsold;
    for (int i = 0; i < n; i++) {
      p[i] = r[i] + beta * p[i];
    }
    rsold = rsnew;
  }
  double resnorm = sqrt(rsold);
  double outv = 0.0;
  outv = resnorm;
  return outv;
}
BENCH_MAIN_SCALAR3(
    T004_Module_041, CG, 4096, 16384, 65536,
    int *rowptr = (int *)malloc((size_t)(n + 1) * sizeof(int));
    int *colind = (int *)malloc((size_t)(3 * n) * sizeof(int));
    double *aval = (double *)malloc((size_t)(3 * n) * sizeof(double));
    double *b = (double *)malloc((size_t)n * sizeof(double));
    double *x = (double *)malloc((size_t)n * sizeof(double));
    double *r = (double *)malloc((size_t)n * sizeof(double));
    double *p = (double *)malloc((size_t)n * sizeof(double));
    double *Ap = (double *)malloc((size_t)n * sizeof(double));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      int pos = 0;
      rowptr[0] = 0;
      for (int i = 0; i < n; i++) {
        if (i > 0) {
          colind[pos] = i - 1;
          aval[pos] = -1.0;
          pos++;
        }
        colind[pos] = i;
        aval[pos] = 2.0;
        pos++;
        if (i < n - 1) {
          colind[pos] = i + 1;
          aval[pos] = -1.0;
          pos++;
        }
        rowptr[i + 1] = pos;
      }
      for (int i = 0; i < n; i++) {
        b[i] = bench_rng_double_signed(&rng);
      }
    },
    ans_scalar = run_cg(n, rowptr, colind, aval, b, x, r, p, Ap), ans_scalar,
    free(rowptr);
    free(colind); free(aval); free(b); free(x); free(r); free(p); free(Ap);)
