#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#define W 4
#define MAXL 256
static inline int min3(int a, int b, int c) {
  int m = a < b ? a : b;
  return m < c ? m : c;
}
static int banded_lev(const char *a, int la, const char *b, int lb) {
  if (la > MAXL)
    la = MAXL;
  if (lb > MAXL)
    lb = MAXL;
  static int dp[2][MAXL + 1];
  for (int j = 0; j <= lb; j++) {
    dp[0][j] = j;
  }
  int cur = 1, pre = 0;
  for (int i = 1; i <= la; i++) {
    int start = i - W;
    if (start < 0)
      start = 0;
    int end = i + W;
    if (end > lb)
      end = lb;
    for (int j2 = start; j2 <= end; j2++) {
      if (j2 == 0) {
        dp[cur][j2] = i;
      } else {
        int cost = (a[i - 1] == b[j2 - 1]) ? 0 : 1;
        int v1 = dp[pre][j2] + 1;
        int v2 = (j2 > start ? dp[cur][j2 - 1] + 1 : dp[cur][j2 - 1] + 1);
        int v3 = dp[pre][j2 - 1] + cost;
        int val = min3(v1, v2, v3);
        dp[cur][j2] = val;
      }
    }
    if (start > 0)
      dp[cur][start - 1] = start - 1 + i - start + 1;
    pre ^= 1;
    cur ^= 1;
  }
  return dp[pre][lb];
}
static double pipeline_run(const char *pairs, int total_len) {
  double outv = 0.0;

  uint64_t acc = 1469598103934665603ULL;
  int pos = 0;
  while (pos < total_len) {
    int a_len = (unsigned char)pairs[pos++];
    int b_len = (unsigned char)pairs[pos++];
    if (a_len > MAXL)
      a_len = MAXL;
    if (b_len > MAXL)
      b_len = MAXL;
    if (pos + a_len + b_len > total_len)
      break;
    const char *A = pairs + pos;
    const char *B = pairs + pos + a_len;
    int d = banded_lev(A, a_len, B, b_len);
    acc ^= ((uint64_t)d + 0x9e3779b97f4a7c15ULL * (uint64_t)a_len);
    pos += a_len + b_len;
  }
  outv += (double)acc;

  return outv;
}
BENCH_MAIN_SCALAR3(T004_Module_028, LEVBND, 4096, 16384, 65536,
                   char *pairs = (char *)malloc((size_t)n * 8 + 16);
                   int total_len = 0; double ans_scalar = 0.0;
                   , ({
                     bench_rng64_t rng = bench_rng_init(seed);
                     total_len = 0;
                     for (int i = 0; i < n; i++) {
                       unsigned long long r1 = bench_rng_next(&rng);
                       unsigned long long r2 = bench_rng_next(&rng);
                       char A[64], B[64];
                       int la = 1 + (int)(r1 % 32ULL);
                       int lb = 1 + (int)(r2 % 32ULL);
                       for (int k = 0; k < la; k++) {
                         A[k] = (char)('a' + (int)((r1 >> (2 * k)) & 3ULL));
                       }
                       for (int k = 0; k < lb; k++) {
                         B[k] = (char)('a' + (int)((r2 >> (2 * k)) & 3ULL));
                       }
                       if (total_len + 2 + la + lb < (int)(n * 8 + 16)) {
                         pairs[total_len++] = (char)la;
                         pairs[total_len++] = (char)lb;
                         memcpy(pairs + total_len, A, la);
                         total_len += la;
                         memcpy(pairs + total_len, B, lb);
                         total_len += lb;
                       } else
                         break;
                     }
                   }),
                   ans_scalar = pipeline_run(pairs, total_len), ans_scalar,
                   free(pairs);)
