
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int n, double *ans_out) {
  if (n < 2)
    n = 2;
  int B = 1;
  while ((long long)(B + 1) * (long long)(B + 1) <= (long long)n)
    B++;
  if (B < 1)
    B = 1;
  int nb = (n + B - 1) / B;
  int *a = (int *)malloc((size_t)n * sizeof(int));
  int *lazy = (int *)malloc((size_t)nb * sizeof(int));
  int *blen = (int *)malloc((size_t)nb * sizeof(int));
  long long *blockSum = (long long *)malloc((size_t)nb * sizeof(long long));
  if (!a || !lazy || !blen || !blockSum) {
    if (a)
      free(a);
    if (lazy)
      free(lazy);
    if (blen)
      free(blen);
    if (blockSum)
      free(blockSum);
    *ans_out = 0.0;
    return;
  }
  for (int i = 0; i < n; i++) {
    unsigned int v =
        (unsigned int)((1103515245u * ((unsigned)i + 999u) + 54321u) & 0xFFFFu);
    a[i] = (int)v;
  }
  for (int b = 0; b < nb; b++) {
    lazy[b] = 0;
    int start = b * B;
    int end = start + B - 1;
    if (end >= n)
      end = n - 1;
    int lenb = end - start + 1;
    blen[b] = lenb;
    long long s = 0;
    for (int i = start; i <= end; i++) {
      s += (long long)a[i];
    }
    blockSum[b] = s;
  }
  unsigned long long checksum = 0ULL;
  int OPS = 64;
  for (int t = 0; t < OPS; t++) {
    int l = (t * 37 + 11) % n;
    int r = (t * 131 + 7) % n;
    if (l > r) {
      int tmp = l;
      l = r;
      r = tmp;
    }
    int bl = l / B;
    int br = r / B;
    if ((t & 1) == 0) {
      int vadd = (int)((t * 17 + 3) % 11) - 5;
      if (bl == br) {
        for (int i = l; i <= r; i++) {
          a[i] += vadd;
          blockSum[bl] += vadd;
        }
      } else {
        int lend = (bl + 1) * B - 1;
        for (int i = l; i <= lend; i++) {
          a[i] += vadd;
          blockSum[bl] += vadd;
        }
        for (int b = bl + 1; b <= br - 1; b++) {
          lazy[b] += vadd;
          blockSum[b] += (long long)vadd * (long long)blen[b];
        }
        int rstart = br * B;
        for (int i = rstart; i <= r; i++) {
          a[i] += vadd;
          blockSum[br] += vadd;
        }
      }
    } else {
      long long curSum = 0;
      if (bl == br) {
        for (int i = l; i <= r; i++) {
          curSum += (long long)a[i] + (long long)lazy[bl];
        }
      } else {
        int lend = (bl + 1) * B - 1;
        for (int i = l; i <= lend; i++) {
          curSum += (long long)a[i] + (long long)lazy[bl];
        }
        for (int b = bl + 1; b <= br - 1; b++) {
          curSum += blockSum[b];
        }
        int rstart = br * B;
        for (int i = rstart; i <= r; i++) {
          curSum += (long long)a[i] + (long long)lazy[br];
        }
      }
      checksum += (unsigned long long)(curSum & 0xFFFFFFFFULL);
    }
  }
  double ans_double = 0.0;

  ans_double = (double)checksum;

  *ans_out = ans_double;
  free(a);
  free(lazy);
  free(blen);
  free(blockSum);
}
BENCH_MAIN_SCALAR3(T003_Code_053, SQRTBLK, 4096, 16384, 65536,
                   double ans_scalar = 0.0;
                   , ;, kernel_run(n, &ans_scalar), ans_scalar, ;)
