
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int n, const int *a, const int *qL, const int *qR,
                const int *qTime, const int *uPos, const int *uNew,
                const int *uOld, double *ans_out) {
  int Q = n / 2;
  int U = n - Q;
  const int BS = 128;
  int *blockL = (int *)malloc((size_t)Q * sizeof(int));
  int *blockR = (int *)malloc((size_t)Q * sizeof(int));
  int *tArr = (int *)malloc((size_t)Q * sizeof(int));
  int *ordA = (int *)malloc((size_t)Q * sizeof(int));
  int *ordB = (int *)malloc((size_t)Q * sizeof(int));
  int *res = (int *)malloc((size_t)Q * sizeof(int));
  int *curA = (int *)malloc((size_t)n * sizeof(int));
  int *cnt = (int *)malloc((size_t)65537 * sizeof(int));
  if (!blockL || !blockR || !tArr || !ordA || !ordB || !res || !curA || !cnt) {
    if (blockL)
      free(blockL);
    if (blockR)
      free(blockR);
    if (tArr)
      free(tArr);
    if (ordA)
      free(ordA);
    if (ordB)
      free(ordB);
    if (res)
      free(res);
    if (curA)
      free(curA);
    if (cnt)
      free(cnt);
    *ans_out = 0.0;
    return;
  }
  int maxBL = 0, maxBR = 0, maxT = 0;
  for (int i = 0; i < Q; i++) {
    int L = qL[i];
    int R = qR[i];
    if (L < 0)
      L = 0;
    if (R >= n)
      R = n - 1;
    if (L > R) {
      int tmp = L;
      L = R;
      R = tmp;
    }
    blockL[i] = L / BS;
    blockR[i] = R / BS;
    tArr[i] = qTime[i];
    ordA[i] = i;
    if (blockL[i] > maxBL)
      maxBL = blockL[i];
    if (blockR[i] > maxBR)
      maxBR = blockR[i];
    if (tArr[i] > maxT)
      maxT = tArr[i];
  }
  for (int i = 0; i <= 65536; i++)
    cnt[i] = 0;
  for (int i = 0; i < Q; i++) {
    int id = ordA[i];
    int key = tArr[id];
    if (key < 0)
      key = 0;
    if (key > maxT)
      key = maxT;
    cnt[key]++;
  }
  {
    int sum = 0;
    for (int k = 0; k <= maxT; k++) {
      int tmp = cnt[k];
      cnt[k] = sum;
      sum += tmp;
    }
  }
  for (int i = 0; i < Q; i++) {
    int id = ordA[i];
    int key = tArr[id];
    if (key < 0)
      key = 0;
    if (key > maxT)
      key = maxT;
    int pos = cnt[key]++;
    ordB[pos] = id;
  }
  {
    int *tmp = ordA;
    ordA = ordB;
    ordB = tmp;
  }
  for (int i = 0; i <= 65536; i++)
    cnt[i] = 0;
  for (int i = 0; i < Q; i++) {
    int id = ordA[i];
    int key = blockR[id];
    if (key < 0)
      key = 0;
    if (key > maxBR)
      key = maxBR;
    cnt[key]++;
  }
  {
    int sum = 0;
    for (int k = 0; k <= maxBR; k++) {
      int tmp = cnt[k];
      cnt[k] = sum;
      sum += tmp;
    }
  }
  for (int i = 0; i < Q; i++) {
    int id = ordA[i];
    int key = blockR[id];
    if (key < 0)
      key = 0;
    if (key > maxBR)
      key = maxBR;
    int pos = cnt[key]++;
    ordB[pos] = id;
  }
  {
    int *tmp = ordA;
    ordA = ordB;
    ordB = tmp;
  }
  for (int i = 0; i <= 65536; i++)
    cnt[i] = 0;
  for (int i = 0; i < Q; i++) {
    int id = ordA[i];
    int key = blockL[id];
    if (key < 0)
      key = 0;
    if (key > maxBL)
      key = maxBL;
    cnt[key]++;
  }
  {
    int sum = 0;
    for (int k = 0; k <= maxBL; k++) {
      int tmp = cnt[k];
      cnt[k] = sum;
      sum += tmp;
    }
  }
  for (int i = 0; i < Q; i++) {
    int id = ordA[i];
    int key = blockL[id];
    if (key < 0)
      key = 0;
    if (key > maxBL)
      key = maxBL;
    int pos = cnt[key]++;
    ordB[pos] = id;
  }
  int *finalOrd = ordB;
  for (int i = 0; i < n; i++)
    curA[i] = a[i] & 63;
  int freq64[64];
  for (int i = 0; i < 64; i++)
    freq64[i] = 0;
  int distinct = 0;
  int curL = 0;
  int curR = -1;
  int curT = 0;
  for (int qi = 0; qi < Q; qi++) {
    int qid = finalOrd[qi];
    int L = qL[qid];
    int R = qR[qid];
    if (L < 0)
      L = 0;
    if (R >= n)
      R = n - 1;
    if (L > R) {
      int tmp = L;
      L = R;
      R = tmp;
    }
    int targetT = qTime[qid];
    while (curT < targetT) {
      int k = curT;
      int p = uPos[k];
      if (p < 0)
        p = 0;
      if (p >= n)
        p = n - 1;
      int newv = uNew[k] & 63;
      int oldv = curA[p] & 63;
      if (p >= curL && p <= curR) {
        int fv = --freq64[oldv];
        if (fv == 0)
          distinct--;
        int fn = ++freq64[newv];
        if (fn == 1)
          distinct++;
      }
      curA[p] = newv;
      curT++;
    }
    while (curT > targetT) {
      int k = curT - 1;
      int p = uPos[k];
      if (p < 0)
        p = 0;
      if (p >= n)
        p = n - 1;
      int newv_now = curA[p] & 63;
      int oldv = uOld[k] & 63;
      if (p >= curL && p <= curR) {
        int fv = --freq64[newv_now];
        if (fv == 0)
          distinct--;
        int fo = ++freq64[oldv];
        if (fo == 1)
          distinct++;
      }
      curA[p] = oldv;
      curT--;
    }
    while (curL > L) {
      curL--;
      int v = curA[curL] & 63;
      int f = ++freq64[v];
      if (f == 1)
        distinct++;
    }
    while (curR < R) {
      curR++;
      int v = curA[curR] & 63;
      int f = ++freq64[v];
      if (f == 1)
        distinct++;
    }
    while (curL < L) {
      int v = curA[curL] & 63;
      int f = --freq64[v];
      if (f == 0)
        distinct--;
      curL++;
    }
    while (curR > R) {
      int v = curA[curR] & 63;
      int f = --freq64[v];
      if (f == 0)
        distinct--;
      curR--;
    }
    res[qid] = distinct;
  }
  long long total = 0;
  for (int i = 0; i < Q; i++) {
    total += (long long)res[i];
  }
  double ans_double = 0.0;

  ans_double = (double)total;

  *ans_out = ans_double;
  free(blockL);
  free(blockR);
  free(tArr);
  free(ordA);
  free(ordB);
  free(res);
  free(curA);
  free(cnt);
}
BENCH_MAIN_SCALAR3(
    T003_Code_057, MOUPD, 4096, 16384, 65536,
    int *a = (int *)malloc((size_t)n * sizeof(int));
    int half = n / 2 + 5; int *qL = (int *)malloc((size_t)half * sizeof(int));
    int *qR = (int *)malloc((size_t)half * sizeof(int));
    int *qTime = (int *)malloc((size_t)half * sizeof(int));
    int *uPos = (int *)malloc((size_t)half * sizeof(int));
    int *uNew = (int *)malloc((size_t)half * sizeof(int));
    int *uOld = (int *)malloc((size_t)half * sizeof(int));
    int *tmpA = (int *)malloc((size_t)n * sizeof(int)); double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < n; i++) {
        a[i] = (int)(bench_rng_next(&rng) & 63ULL);
      }
      for (int i = 0; i < n; i++) {
        tmpA[i] = a[i];
      }
      int updi = 0;
      int qi = 0;
      int updCount = 0;
      for (int step = 0; step < n; step++) {
        if ((step & 1) == 0) {
          int p = (int)(bench_rng_next(&rng) % (unsigned long long)n);
          int nv = (int)(bench_rng_next(&rng) & 63ULL);
          uPos[updi] = p;
          uOld[updi] = tmpA[p];
          uNew[updi] = nv;
          tmpA[p] = nv;
          updi++;
          updCount++;
        } else {
          int L = (int)(bench_rng_next(&rng) % (unsigned long long)n);
          int R = (int)(bench_rng_next(&rng) % (unsigned long long)n);
          if (L > R) {
            int t = L;
            L = R;
            R = t;
          }
          qL[qi] = L;
          qR[qi] = R;
          qTime[qi] = updCount;
          qi++;
        }
      }
      free(tmpA);
    },
    kernel_run(n, a, qL, qR, qTime, uPos, uNew, uOld, &ans_scalar), ans_scalar,
    free(a);
    free(qL); free(qR); free(qTime); free(uPos); free(uNew); free(uOld);)
