
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int n, int nL, int nR, int E, const int *left_u,
                const int *left_v, double *ans_out) {
  double ans_double = 0.0;

  int *headL = (int *)malloc((size_t)nL * sizeof(int));
  int *to = (int *)malloc((size_t)E * sizeof(int));
  int *nxt = (int *)malloc((size_t)E * sizeof(int));
  if (!headL || !to || !nxt) {
    if (headL)
      free(headL);
    if (to)
      free(to);
    if (nxt)
      free(nxt);
    *ans_out = 0.0;
    return;
  }
  for (int i = 0; i < nL; i++)
    headL[i] = -1;
  for (int e = 0; e < E; e++) {
    int u = left_u[e];
    int v = left_v[e];
    if (u < 0)
      u = 0;
    if (u >= nL)
      u %= nL;
    if (v < 0)
      v = 0;
    if (v >= nR)
      v %= nR;
    to[e] = v;
    nxt[e] = headL[u];
    headL[u] = e;
  }
  int *pairU = (int *)malloc((size_t)nL * sizeof(int));
  int *pairV = (int *)malloc((size_t)nR * sizeof(int));
  if (!pairU || !pairV) {
    if (pairU)
      free(pairU);
    if (pairV)
      free(pairV);
    free(headL);
    free(to);
    free(nxt);
    *ans_out = 0.0;
    return;
  }
  for (int i = 0; i < nL; i++)
    pairU[i] = -1;
  for (int j = 0; j < nR; j++)
    pairV[j] = -1;
  long long matching = 0;
  while (1) {
    char *visL = (char *)calloc((size_t)nL, 1);
    char *visR = (char *)calloc((size_t)nR, 1);
    int *prevR = (int *)malloc((size_t)nR * sizeof(int));
    int *q = (int *)malloc((size_t)nL * sizeof(int));
    if (!visL || !visR || !prevR || !q) {
      if (visL)
        free(visL);
      if (visR)
        free(visR);
      if (prevR)
        free(prevR);
      if (q)
        free(q);
      break;
    }
    for (int j = 0; j < nR; j++)
      prevR[j] = -1;
    int qh = 0, qt = 0;
    for (int u = 0; u < nL; u++) {
      if (pairU[u] == -1) {
        visL[u] = 1;
        q[qt++] = u;
      }
    }
    int found_v = -1;
    while (qh < qt && found_v == -1) {
      int u = q[qh++];
      for (int e = headL[u]; e != -1; e = nxt[e]) {
        int v = to[e];
        if (visR[v])
          continue;
        visR[v] = 1;
        prevR[v] = u;
        if (pairV[v] == -1) {
          found_v = v;
          break;
        } else {
          int u2 = pairV[v];
          if (!visL[u2]) {
            visL[u2] = 1;
            q[qt++] = u2;
          }
        }
      }
    }
    if (found_v == -1) {
      free(visL);
      free(visR);
      free(prevR);
      free(q);
      break;
    }
    int vcur = found_v;
    while (vcur != -1) {
      int u_prev = prevR[vcur];
      int next_v = pairU[u_prev];
      pairU[u_prev] = vcur;
      pairV[vcur] = u_prev;
      vcur = next_v;
    }
    matching++;
    free(visL);
    free(visR);
    free(prevR);
    free(q);
  }
  ans_double = (double)matching;
  free(pairU);
  free(pairV);
  free(headL);
  free(to);
  free(nxt);

  *ans_out = ans_double;
}
BENCH_MAIN_SCALAR3(
    T003_Code_017, HKARP, 4096, 16384, 65536, int nL = n / 256 + 4;
    if (nL > 256) nL = 256; int nR = n / 256 + 4; if (nR > 256) nR = 256;
    int E = nL * 8; if (E < nL) E = nL;
    int *elu = (int *)malloc((size_t)E * sizeof(int));
    int *elv = (int *)malloc((size_t)E * sizeof(int)); double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 0; i < E; i++) {
        elu[i] = (int)(bench_rng_next(&rng) % (uint64_t)nL);
        elv[i] = (int)(bench_rng_next(&rng) % (uint64_t)nR);
      }
    },
    kernel_run(n, nL, nR, E, elu, elv, &ans_scalar), ans_scalar, free(elu);
    free(elv);)
