#include "bench_harness.h"
#include "bench_utils.h"
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MOD32 4294967291u
#define MU32 4294967301u
static inline uint32_t barrett_reduce64(uint64_t x) {
  unsigned __int128 t = (unsigned __int128)x * (unsigned __int128)MU32;
  uint64_t q = (uint64_t)(t >> 64);
  uint64_t r = x - q * (uint64_t)MOD32;
  if (r >= MOD32)
    r -= MOD32;
  if (r >= MOD32)
    r -= MOD32;
  return (uint32_t)r;
}
static inline uint32_t modmul32(uint32_t a, uint32_t b) {
  uint64_t x = (uint64_t)a * (uint64_t)b;
  return barrett_reduce64(x);
}
static uint32_t modexp32(uint32_t base, uint32_t e) {
  uint32_t result = 1u;
  uint32_t cur = (uint32_t)(base % MOD32);
  uint32_t expv = e;
  while (expv) {
    if (expv & 1u)
      result = modmul32(result, cur);
    cur = modmul32(cur, cur);
    expv >>= 1u;
  }
  return result;
}
static double pipeline_run(int n, const uint32_t *arr) {
  uint64_t acc = 0;
  for (int i = 0; i < n; i++) {
    uint32_t base = arr[i] % MOD32;
    uint32_t pw = (uint32_t)(i + 1);
    uint32_t v = modexp32(base, pw);
    acc += (uint64_t)v;
    acc %= (uint64_t)MOD32;
  }
  uint64_t total = acc;
  double outv = 0.0;

  outv = (double)total;
  return outv;
}
BENCH_MAIN_SCALAR3(
    T004_Module_035, BARR32, 4096, 16384, 65536,
    uint32_t *arr = (uint32_t *)malloc((size_t)n * sizeof(uint32_t));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++)
        arr[i] = (uint32_t)(bench_rng_next(&rng) & 0xFFFFFFFFu);
    },
    ans_scalar = pipeline_run(n, arr), ans_scalar, free(arr);)
