
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int n, const int *a, const int *ql, const int *qr,
                double *ans_out) {
  const int Q = n;
  const int BS = 256;
  int numBlocks = (n + BS - 1) / BS;
  int *ord = (int *)malloc((size_t)Q * sizeof(int));
  int *tmpIdx = (int *)malloc((size_t)Q * sizeof(int));
  int *res = (int *)malloc((size_t)Q * sizeof(int));
  if (!ord || !tmpIdx || !res) {
    if (ord)
      free(ord);
    if (tmpIdx)
      free(tmpIdx);
    if (res)
      free(res);
    *ans_out = 0.0;
    return;
  }
  int ord_sz = 0;
  for (int b = 0; b < numBlocks; b++) {
    int cntBlock = 0;
    for (int i = 0; i < Q; i++) {
      int L = ql[i];
      if (L < 0)
        L = 0;
      if (L >= n)
        L = n - 1;
      int blk = L / BS;
      if (blk == b) {
        tmpIdx[cntBlock++] = i;
      }
    }
    for (int ii = 0; ii < cntBlock; ii++) {
      int best = ii;
      for (int jj = ii + 1; jj < cntBlock; jj++) {
        int rid_best = tmpIdx[best];
        int rid_j = tmpIdx[jj];
        int Rb = qr[rid_best];
        int Rj = qr[rid_j];
        if ((b % 2) == 0) {
          if (Rj < Rb)
            best = jj;
        } else {
          if (Rj > Rb)
            best = jj;
        }
      }
      int swp = tmpIdx[ii];
      tmpIdx[ii] = tmpIdx[best];
      tmpIdx[best] = swp;
      ord[ord_sz++] = tmpIdx[ii];
    }
  }
  int freq[64];
  for (int i = 0; i < 64; i++)
    freq[i] = 0;
  int distinct = 0;
  int curL = 0;
  int curR = -1;
  for (int k = 0; k < ord_sz; k++) {
    int id = ord[k];
    int L = ql[id];
    int R = qr[id];
    if (L < 0)
      L = 0;
    if (R >= n)
      R = n - 1;
    if (L > R) {
      int tmp = L;
      L = R;
      R = tmp;
    }
    while (curL > L) {
      curL--;
      int v = a[curL] & 63;
      freq[v]++;
      if (freq[v] == 1)
        distinct++;
    }
    while (curR < R) {
      curR++;
      int v = a[curR] & 63;
      freq[v]++;
      if (freq[v] == 1)
        distinct++;
    }
    while (curL < L) {
      int v = a[curL] & 63;
      freq[v]--;
      if (freq[v] == 0)
        distinct--;
      curL++;
    }
    while (curR > R) {
      int v = a[curR] & 63;
      freq[v]--;
      if (freq[v] == 0)
        distinct--;
      curR--;
    }
    res[id] = distinct;
  }
  long long total = 0;
  for (int i = 0; i < Q; i++) {
    total += (long long)res[i];
  }
  double ans_double = 0.0;

  ans_double = (double)total;

  *ans_out = ans_double;
  free(ord);
  free(tmpIdx);
  free(res);
}
BENCH_MAIN_SCALAR3(
    T003_Code_056, MO1, 4096, 16384, 65536,
    int *a = (int *)malloc((size_t)n * sizeof(int));
    int *ql = (int *)malloc((size_t)n * sizeof(int));
    int *qr = (int *)malloc((size_t)n * sizeof(int)); double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++) {
        a[i] = (int)(bench_rng_next(&rng) & 63ULL);
      }
      for (int i = 0; i < n; i++) {
        int L = (int)(bench_rng_next(&rng) % (unsigned long long)n);
        int R = (int)(bench_rng_next(&rng) % (unsigned long long)n);
        if (L > R) {
          int tmp = L;
          L = R;
          R = tmp;
        }
        ql[i] = L;
        qr[i] = R;
      }
    },
    kernel_run(n, a, ql, qr, &ans_scalar), ans_scalar, free(a);
    free(ql); free(qr);)
