
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int n, const uint32_t *garr, const uint32_t *harr,
                double *ans_out) {
  const uint32_t p = 10007U;
  const int m = 101;
  int Q = (n < 32) ? n : 32;
  unsigned long long sumFound = 0ULL;
  for (int qi = 0; qi < Q; qi++) {
    uint32_t g = garr[qi] % p;
    if (g < 2U)
      g += 2U;
    if (g >= p)
      g = (g % (p - 1U)) + 1U;
    uint32_t h = harr[qi] % p;
    if (h == 0U)
      h = 1U;
    uint32_t baby[128];
    baby[0] = 1U;
    for (int j = 1; j < m; j++) {
      uint64_t mulv = (uint64_t)baby[j - 1] * (uint64_t)g;
      baby[j] = (uint32_t)(mulv % p);
    }
    uint32_t inv_g;
    {
      uint32_t base = g % p;
      uint32_t e = p - 2U;
      uint32_t mod = p;
      uint32_t r = 1U;
      while (e) {
        if (e & 1U) {
          uint64_t t = (uint64_t)r * base;
          r = (uint32_t)(t % mod);
        }
        uint64_t t2 = (uint64_t)base * base;
        base = (uint32_t)(t2 % mod);
        e >>= 1U;
      }
      inv_g = r;
    }
    uint32_t g_inv_m;
    {
      uint32_t base2 = inv_g % p;
      uint32_t e2 = (uint32_t)m;
      uint32_t r2 = 1U;
      while (e2) {
        if (e2 & 1U) {
          uint64_t t3 = (uint64_t)r2 * base2;
          r2 = (uint32_t)(t3 % p);
        }
        uint64_t t4 = (uint64_t)base2 * base2;
        base2 = (uint32_t)(t4 % p);
        e2 >>= 1U;
      }
      g_inv_m = r2;
    }
    int found = -1;
    uint32_t cur = h % p;
    for (int i = 0; i < m && found < 0; i++) {
      for (int j = 0; j < m; j++) {
        if (cur == baby[j]) {
          found = i * m + j;
          break;
        }
      }
      if (found >= 0)
        break;
      uint64_t stepmul = (uint64_t)cur * (uint64_t)g_inv_m;
      cur = (uint32_t)(stepmul % p);
    }
    if (found >= 0) {
      sumFound += (unsigned long long)found;
    }
  }
  double ans_double = 0.0;

  ans_double = (double)sumFound;

  *ans_out = ans_double;
}
BENCH_MAIN_SCALAR3(
    T003_Code_039, BSGS, 4096, 16384, 65536,
    uint32_t *garr = (uint32_t *)malloc((size_t)n * sizeof(uint32_t));
    uint32_t *harr = (uint32_t *)malloc((size_t)n * sizeof(uint32_t));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++) {
        garr[i] = (uint32_t)(2U + (bench_rng_next(&rng) % 10000ULL));
        harr[i] = (uint32_t)(1U + (bench_rng_next(&rng) % 10000ULL));
      }
    },
    kernel_run(n, garr, harr, &ans_scalar), ans_scalar, free(garr);
    free(harr);)
