#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int n, const unsigned char *A, const unsigned char *B,
                double *ans_out) {
  int nA = n / 2;
  int nB = n - nA;
  int N = nA + 1 + nB;
  double ans_double = 0.0;
  unsigned char *S = (unsigned char *)malloc((size_t)N * sizeof(unsigned char));
  int *sa = (int *)malloc((size_t)N * sizeof(int));
  int *tmp_sa = (int *)malloc((size_t)N * sizeof(int));
  int *rankv = (int *)malloc((size_t)N * sizeof(int));
  int *tmp_rankv = (int *)malloc((size_t)N * sizeof(int));
  int *cnt = (int *)malloc((size_t)(N + 300) * sizeof(int));
  int *inv = (int *)malloc((size_t)N * sizeof(int));
  int *lcp = (int *)malloc((size_t)N * sizeof(int));
  if (!S || !sa || !tmp_sa || !rankv || !tmp_rankv || !cnt || !inv || !lcp) {
    if (S)
      free(S);
    if (sa)
      free(sa);
    if (tmp_sa)
      free(tmp_sa);
    if (rankv)
      free(rankv);
    if (tmp_rankv)
      free(tmp_rankv);
    if (cnt)
      free(cnt);
    if (inv)
      free(inv);
    if (lcp)
      free(lcp);
    *ans_out = 0.0;
    return;
  }
  for (int i = 0; i < nA; i++) {
    S[i] = A[i];
  }
  S[nA] = '{';
  for (int i = 0; i < nB; i++) {
    S[nA + 1 + i] = B[i];
  }
  for (int i = 0; i < N; i++) {
    sa[i] = i;
    rankv[i] = (int)S[i];
  }
  int classes = 256;
  for (int k = 1; k < N; k <<= 1) {
    int cnt_size = classes + 1;
    if (cnt_size < 2)
      cnt_size = 2;
    memset(cnt, 0, (size_t)(cnt_size + 1) * sizeof(int));
    for (int ii = 0; ii < N; ii++) {
      int idx = sa[ii];
      int key2 = (idx + k < N) ? (rankv[idx + k] + 1) : 0;
      cnt[key2]++;
    }
    {
      int sum = 0;
      for (int v = 0; v <= cnt_size; v++) {
        int ccur = cnt[v];
        cnt[v] = sum;
        sum += ccur;
      }
    }
    for (int ii = 0; ii < N; ii++) {
      int idx = sa[ii];
      int key2 = (idx + k < N) ? (rankv[idx + k] + 1) : 0;
      tmp_sa[cnt[key2]++] = idx;
    }
    memset(cnt, 0, (size_t)(cnt_size + 1) * sizeof(int));
    for (int ii = 0; ii < N; ii++) {
      int idx = tmp_sa[ii];
      int key1 = rankv[idx] + 1;
      cnt[key1]++;
    }
    {
      int sum = 0;
      for (int v = 0; v <= cnt_size; v++) {
        int ccur = cnt[v];
        cnt[v] = sum;
        sum += ccur;
      }
    }
    for (int ii = 0; ii < N; ii++) {
      int idx = tmp_sa[ii];
      int key1 = rankv[idx] + 1;
      sa[cnt[key1]++] = idx;
    }
    tmp_rankv[sa[0]] = 0;
    int classes_new = 1;
    for (int ii = 1; ii < N; ii++) {
      int cur = sa[ii];
      int prev = sa[ii - 1];
      int r1a = rankv[cur];
      int r1b = rankv[prev];
      int r2a = (cur + k < N) ? rankv[cur + k] : -1;
      int r2b = (prev + k < N) ? rankv[prev + k] : -1;
      if (r1a != r1b || r2a != r2b)
        classes_new++;
      tmp_rankv[cur] = classes_new - 1;
    }
    for (int ii = 0; ii < N; ii++) {
      rankv[ii] = tmp_rankv[ii];
    }
    classes = classes_new;
    if (classes == N)
      break;
  }
  for (int i = 0; i < N; i++)
    inv[sa[i]] = i;
  int h = 0;
  for (int i = 0; i < N; i++) {
    int r = inv[i];
    if (r > 0) {
      int j = sa[r - 1];
      while (i + h < N && j + h < N && S[i + h] == S[j + h])
        h++;
      lcp[r] = h;
      if (h > 0)
        h--;
    } else {
      lcp[r] = 0;
    }
  }
  int best = 0;
  for (int pos = 1; pos < N; pos++) {
    int i1 = sa[pos];
    int i2 = sa[pos - 1];
    if (i1 == nA || i2 == nA)
      continue;
    int inA1 = (i1 < nA);
    int inA2 = (i2 < nA);
    if (inA1 != inA2) {
      int cand = lcp[pos];
      if (cand > best)
        best = cand;
    }
  }
  ans_double = (double)best;
  *ans_out = ans_double;
  free(S);
  free(sa);
  free(tmp_sa);
  free(rankv);
  free(tmp_rankv);
  free(cnt);
  free(inv);
  free(lcp);
}
BENCH_MAIN_SCALAR3(
    T003_Code_012, SA_LCP, 4096, 16384, 65536, int nA = n / 2; int nB = n - nA;
    unsigned char *A = (unsigned char *)malloc((size_t)nA *
                                               sizeof(unsigned char));
    unsigned char *B = (unsigned char *)malloc((size_t)nB *
                                               sizeof(unsigned char));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < nA; i++) {
        A[i] = (unsigned char)('a' + (bench_rng_next(&rng) % 26ULL));
      }
      for (int i = 0; i < nB; i++) {
        B[i] = (unsigned char)('a' + (bench_rng_next(&rng) % 26ULL));
      }
    },
    kernel_run(n, A, B, &ans_scalar), ans_scalar, free(A);
    free(B);)
