
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int n, double *ans_out) {
  if (n < 2)
    n = 2;
  int dim = 1;
  while ((long long)dim * (long long)dim < (long long)n)
    dim++;
  int W = dim + 2;
  size_t bitSize = (size_t)W * (size_t)W;
  long long *bit = (long long *)malloc(bitSize * sizeof(long long));
  if (!bit) {
    *ans_out = 0.0;
    return;
  }
  for (size_t i = 0; i < bitSize; i++)
    bit[i] = 0;
  for (int x = 0; x < dim; x++) {
    for (int y = 0; y < dim; y++) {
      unsigned int basev = (unsigned int)((x * 1315423911u + y * 2654435761u +
                                           (unsigned)n * 97531u) %
                                          1000u);
      int delta = (int)basev;
      int ix = x + 1;
      while (ix <= dim) {
        int jy = y + 1;
        while (jy <= dim) {
          size_t idx2 = (size_t)ix * (size_t)W + (size_t)jy;
          bit[idx2] += (long long)delta;
          jy += (jy & -jy);
        }
        ix += (ix & -ix);
      }
    }
  }
  unsigned long long checksum = 0ULL;
  int OPS = 64;
  for (int t = 0; t < OPS; t++) {
    int x1 = (t * 17 + 3) % dim;
    int y1 = (t * 19 + 5) % dim;
    int x2 = (t * 23 + 7) % dim;
    int y2 = (t * 29 + 11) % dim;
    if (x1 > x2) {
      int tmp = x1;
      x1 = x2;
      x2 = tmp;
    }
    if (y1 > y2) {
      int tmp2 = y1;
      y1 = y2;
      y2 = tmp2;
    }
    if ((t % 2) == 0) {
      int delta2 = (int)((t * 31 + 13) % 21) - 10;
      int ix2 = x2 + 1;
      while (ix2 <= dim) {
        int jy2 = y2 + 1;
        while (jy2 <= dim) {
          size_t idxp = (size_t)ix2 * (size_t)W + (size_t)jy2;
          bit[idxp] += (long long)delta2;
          jy2 += (jy2 & -jy2);
        }
        ix2 += (ix2 & -ix2);
      }
    } else {
      long long sumA = 0, sumB = 0, sumC = 0, sumD = 0;
      {
        int xx = x2 + 1;
        while (xx > 0) {
          int yy = y2 + 1;
          while (yy > 0) {
            size_t idxq = (size_t)xx * (size_t)W + (size_t)yy;
            sumA += bit[idxq];
            yy -= (yy & -yy);
          }
          xx -= (xx & -xx);
        }
      }
      {
        int xx = x1;
        while (xx > 0) {
          int yy = y2 + 1;
          while (yy > 0) {
            size_t idxq = (size_t)xx * (size_t)W + (size_t)yy;
            sumB += bit[idxq];
            yy -= (yy & -yy);
          }
          xx -= (xx & -xx);
        }
      }
      {
        int xx = x2 + 1;
        while (xx > 0) {
          int yy = y1;
          while (yy > 0) {
            size_t idxq = (size_t)xx * (size_t)W + (size_t)yy;
            sumC += bit[idxq];
            yy -= (yy & -yy);
          }
          xx -= (xx & -xx);
        }
      }
      {
        int xx = x1;
        while (xx > 0) {
          int yy = y1;
          while (yy > 0) {
            size_t idxq = (size_t)xx * (size_t)W + (size_t)yy;
            sumD += bit[idxq];
            yy -= (yy & -yy);
          }
          xx -= (xx & -xx);
        }
      }
      long long rect = sumA - sumB - sumC + sumD;
      checksum += (unsigned long long)(rect & 0xFFFFFFFFULL);
    }
  }
  double ans_double = 0.0;

  ans_double = (double)checksum;

  *ans_out = ans_double;
  free(bit);
}
BENCH_MAIN_SCALAR3(T003_Code_052, BIT2D, 4096, 16384, 65536,
                   double ans_scalar = 0.0;
                   , ;, kernel_run(n, &ans_scalar), ans_scalar, ;)
