#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define RNGSTEP(state)                                                         \
  do {                                                                         \
    uint64_t x__ = (state);                                                    \
    x__ ^= x__ >> 12;                                                          \
    x__ ^= x__ << 25;                                                          \
    x__ ^= x__ >> 27;                                                          \
    (state) = x__ * 2685821657736338717ULL;                                    \
  } while (0)
void kernel_run(int n, const unsigned char *s, double *ans_out) {
  const uint32_t MOD1 = 1000000007u;
  const uint32_t MOD2 = 1000000009u;
  const uint32_t BASE = 91138233u;
  const int Q = 1024;
  uint32_t *h1 = (uint32_t *)malloc((size_t)(n + 1) * sizeof(uint32_t));
  uint32_t *h2 = (uint32_t *)malloc((size_t)(n + 1) * sizeof(uint32_t));
  uint32_t *p1 = (uint32_t *)malloc((size_t)(n + 1) * sizeof(uint32_t));
  uint32_t *p2 = (uint32_t *)malloc((size_t)(n + 1) * sizeof(uint32_t));
  if (!h1 || !h2 || !p1 || !p2) {
    if (h1)
      free(h1);
    if (h2)
      free(h2);
    if (p1)
      free(p1);
    if (p2)
      free(p2);
    *ans_out = 0.0;
    return;
  }
  double ans_double = 0.0;

  h1[0] = 0u;
  h2[0] = 0u;
  p1[0] = 1u;
  p2[0] = 1u;
  for (int i = 0; i < n; i++) {
    uint32_t v = (uint32_t)(unsigned char)s[i];
    uint64_t t1 = (uint64_t)h1[i] * (uint64_t)BASE + (uint64_t)v;
    uint64_t t2 = (uint64_t)h2[i] * (uint64_t)BASE + (uint64_t)v;
    h1[i + 1] = (uint32_t)(t1 % (uint64_t)MOD1);
    h2[i + 1] = (uint32_t)(t2 % (uint64_t)MOD2);
    uint64_t u1 = (uint64_t)p1[i] * (uint64_t)BASE;
    uint64_t u2 = (uint64_t)p2[i] * (uint64_t)BASE;
    p1[i + 1] = (uint32_t)(u1 % (uint64_t)MOD1);
    p2[i + 1] = (uint32_t)(u2 % (uint64_t)MOD2);
  }
  long long eqcnt = 0;
  uint64_t rng_state = 0x123456789abcdefULL ^ (uint64_t)n;
  for (int qi = 0; qi < Q; qi++) {
    RNGSTEP(rng_state);
    int maxL = n / 4 + 1;
    if (maxL < 1)
      maxL = 1;
    int len = (int)(rng_state % (uint64_t)maxL) + 1;
    int maxStart = n - len + 1;
    if (maxStart < 1)
      maxStart = 1;
    RNGSTEP(rng_state);
    int l1 = (int)(rng_state % (uint64_t)maxStart);
    RNGSTEP(rng_state);
    int l2 = (int)(rng_state % (uint64_t)maxStart);
    int r1 = l1 + len;
    int r2 = l2 + len;
    uint32_t A1 =
        (uint32_t)(((uint64_t)h1[r1] + (uint64_t)MOD1 -
                    ((uint64_t)h1[l1] * (uint64_t)p1[len] % (uint64_t)MOD1)) %
                   (uint64_t)MOD1);
    uint32_t A2 =
        (uint32_t)(((uint64_t)h2[r1] + (uint64_t)MOD2 -
                    ((uint64_t)h2[l1] * (uint64_t)p2[len] % (uint64_t)MOD2)) %
                   (uint64_t)MOD2);
    uint32_t B1 =
        (uint32_t)(((uint64_t)h1[r2] + (uint64_t)MOD1 -
                    ((uint64_t)h1[l2] * (uint64_t)p1[len] % (uint64_t)MOD1)) %
                   (uint64_t)MOD1);
    uint32_t B2 =
        (uint32_t)(((uint64_t)h2[r2] + (uint64_t)MOD2 -
                    ((uint64_t)h2[l2] * (uint64_t)p2[len] % (uint64_t)MOD2)) %
                   (uint64_t)MOD2);
    if (A1 == B1 && A2 == B2)
      eqcnt++;
  }
  ans_double = (double)eqcnt;

  *ans_out = ans_double;
  free(h1);
  free(h2);
  free(p1);
  free(p2);
}
BENCH_MAIN_SCALAR3(
    T003_Code_011, RHASH, 4096, 16384, 65536,
    unsigned char *s = (unsigned char *)malloc((size_t)n *
                                               sizeof(unsigned char));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++) {
        s[i] = (unsigned char)('a' + (bench_rng_next(&rng) % 26ULL));
      }
    },
    kernel_run(n, s, &ans_scalar), ans_scalar, free(s);)
