
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int n, int V, int E, const int *eu, const int *ev,
                double *ans_out) {
  double ans_double = 0.0;

  int *head = (int *)malloc((size_t)V * sizeof(int));
  int *to = (int *)malloc((size_t)E * sizeof(int));
  int *nxt = (int *)malloc((size_t)E * sizeof(int));
  int *rhead = (int *)malloc((size_t)V * sizeof(int));
  int *rto = (int *)malloc((size_t)E * sizeof(int));
  int *rnxt = (int *)malloc((size_t)E * sizeof(int));
  if (!head || !to || !nxt || !rhead || !rto || !rnxt) {
    if (head)
      free(head);
    if (to)
      free(to);
    if (nxt)
      free(nxt);
    if (rhead)
      free(rhead);
    if (rto)
      free(rto);
    if (rnxt)
      free(rnxt);
    *ans_out = 0.0;
    return;
  }
  for (int i = 0; i < V; i++) {
    head[i] = -1;
    rhead[i] = -1;
  }
  for (int i = 0; i < E; i++) {
    int u = eu[i];
    int v = ev[i];
    if (u < 0)
      u = 0;
    if (u >= V)
      u %= V;
    if (v < 0)
      v = 0;
    if (v >= V)
      v %= V;
    to[i] = v;
    nxt[i] = head[u];
    head[u] = i;
    rto[i] = u;
    rnxt[i] = rhead[v];
    rhead[v] = i;
  }
  int *vis = (int *)calloc((size_t)V, sizeof(int));
  int *order = (int *)malloc((size_t)V * sizeof(int));
  int *stack = (int *)malloc((size_t)V * sizeof(int));
  int *edge_it = (int *)malloc((size_t)V * sizeof(int));
  if (!vis || !order || !stack || !edge_it) {
    if (vis)
      free(vis);
    if (order)
      free(order);
    if (stack)
      free(stack);
    if (edge_it)
      free(edge_it);
    free(head);
    free(to);
    free(nxt);
    free(rhead);
    free(rto);
    free(rnxt);
    *ans_out = 0.0;
    return;
  }
  int order_sz = 0;
  for (int st = 0; st < V; st++) {
    if (vis[st])
      continue;
    int sp = 0;
    stack[sp] = st;
    edge_it[sp] = head[st];
    vis[st] = 1;
    while (sp >= 0) {
      int u = stack[sp];
      int e = edge_it[sp];
      if (e != -1) {
        int v = to[e];
        edge_it[sp] = nxt[e];
        if (!vis[v]) {
          vis[v] = 1;
          sp++;
          stack[sp] = v;
          edge_it[sp] = head[v];
        }
      } else {
        order[order_sz++] = u;
        sp--;
      }
    }
  }
  int *comp = (int *)malloc((size_t)V * sizeof(int));
  if (!comp) {
    free(vis);
    free(order);
    free(stack);
    free(edge_it);
    free(head);
    free(to);
    free(nxt);
    free(rhead);
    free(rto);
    free(rnxt);
    *ans_out = 0.0;
    return;
  }
  for (int i = 0; i < V; i++)
    comp[i] = -1;
  int comp_cnt = 0;
  for (int idx = order_sz - 1; idx >= 0; idx--) {
    int v0 = order[idx];
    if (comp[v0] != -1)
      continue;
    int sp = 0;
    stack[sp] = v0;
    edge_it[sp] = rhead[v0];
    comp[v0] = comp_cnt;
    while (sp >= 0) {
      int u = stack[sp];
      int e = edge_it[sp];
      if (e != -1) {
        int w = rto[e];
        edge_it[sp] = rnxt[e];
        if (comp[w] == -1) {
          comp[w] = comp_cnt;
          sp++;
          stack[sp] = w;
          edge_it[sp] = rhead[w];
        }
      } else {
        sp--;
      }
    }
    comp_cnt++;
  }
  int C = comp_cnt;
  unsigned char *mat = (unsigned char *)calloc((size_t)C * (size_t)C, 1);
  if (!mat) {
    free(vis);
    free(order);
    free(stack);
    free(edge_it);
    free(comp);
    free(head);
    free(to);
    free(nxt);
    free(rhead);
    free(rto);
    free(rnxt);
    *ans_out = 0.0;
    return;
  }
  for (int i = 0; i < E; i++) {
    int cu = comp[eu[i] % V];
    int cv = comp[ev[i] % V];
    if (cu != cv && cu >= 0 && cv >= 0 && cu < C && cv < C) {
      mat[(size_t)cu * (size_t)C + (size_t)cv] = 1;
    }
  }
  int dagE = 0;
  for (int i = 0; i < C; i++) {
    for (int j = 0; j < C; j++) {
      if (mat[(size_t)i * (size_t)C + (size_t)j])
        dagE++;
    }
  }
  int *dag_head = (int *)malloc((size_t)C * sizeof(int));
  int *dag_to = (int *)malloc((size_t)dagE * sizeof(int));
  int *dag_nxt = (int *)malloc((size_t)dagE * sizeof(int));
  int *indeg = (int *)malloc((size_t)C * sizeof(int));
  if (!dag_head || !dag_to || !dag_nxt || !indeg) {
    if (dag_head)
      free(dag_head);
    if (dag_to)
      free(dag_to);
    if (dag_nxt)
      free(dag_nxt);
    if (indeg)
      free(indeg);
    free(mat);
    free(vis);
    free(order);
    free(stack);
    free(edge_it);
    free(comp);
    free(head);
    free(to);
    free(nxt);
    free(rhead);
    free(rto);
    free(rnxt);
    *ans_out = 0.0;
    return;
  }
  for (int i = 0; i < C; i++) {
    dag_head[i] = -1;
    indeg[i] = 0;
  }
  int didx = 0;
  for (int i = 0; i < C; i++) {
    for (int j = 0; j < C; j++) {
      if (mat[(size_t)i * (size_t)C + (size_t)j]) {
        dag_to[didx] = j;
        dag_nxt[didx] = dag_head[i];
        dag_head[i] = didx;
        indeg[j]++;
        didx++;
      }
    }
  }
  int *dp = (int *)malloc((size_t)C * sizeof(int));
  int *q = (int *)malloc((size_t)C * sizeof(int));
  if (!dp || !q) {
    if (dp)
      free(dp);
    if (q)
      free(q);
    free(dag_head);
    free(dag_to);
    free(dag_nxt);
    free(indeg);
    free(mat);
    free(vis);
    free(order);
    free(stack);
    free(edge_it);
    free(comp);
    free(head);
    free(to);
    free(nxt);
    free(rhead);
    free(rto);
    free(rnxt);
    *ans_out = 0.0;
    return;
  }
  int start_comp = (V > 0) ? comp[0] : 0;
  if (start_comp < 0 || start_comp >= C)
    start_comp = 0;
  for (int i = 0; i < C; i++)
    dp[i] = -1000000000;
  dp[start_comp] = 0;
  int qh = 0, qt = 0;
  for (int i = 0; i < C; i++) {
    if (indeg[i] == 0)
      q[qt++] = i;
  }
  while (qh < qt) {
    int u = q[qh++];
    for (int e = dag_head[u]; e != -1; e = dag_nxt[e]) {
      int v = dag_to[e];
      if (dp[u] + 1 > dp[v])
        dp[v] = dp[u] + 1;
      indeg[v]--;
      if (indeg[v] == 0)
        q[qt++] = v;
    }
  }
  int best = 0;
  for (int i = 0; i < C; i++) {
    if (dp[i] > best)
      best = dp[i];
  }
  ans_double = (double)best;
  free(dp);
  free(q);
  free(dag_head);
  free(dag_to);
  free(dag_nxt);
  free(indeg);
  free(mat);
  free(vis);
  free(order);
  free(stack);
  free(edge_it);
  free(comp);
  free(head);
  free(to);
  free(nxt);
  free(rhead);
  free(rto);
  free(rnxt);

  *ans_out = ans_double;
}
BENCH_MAIN_SCALAR3(
    T003_Code_019, SCCDAG, 4096, 16384, 65536, int V = n / 512 + 4;
    if (V > 256) V = 256; int base_chain = (V > 1 ? (V - 1) : 0);
    int extraE = V * 3; int E = base_chain + extraE;
    int *eu = (int *)malloc((size_t)E * sizeof(int));
    int *ev = (int *)malloc((size_t)E * sizeof(int)); double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      int idx = 0;
      for (int i = 0; i < V - 1 && idx < E; i++) {
        eu[idx] = i;
        ev[idx] = i + 1;
        idx++;
      }
      while (idx < E) {
        int u = (int)(bench_rng_next(&rng) % (uint64_t)V);
        int v = (int)(bench_rng_next(&rng) % (uint64_t)V);
        if (u == v)
          v = (v + 1) % V;
        eu[idx] = u;
        ev[idx] = v;
        idx++;
      }
    },
    kernel_run(n, V, E, eu, ev, &ans_scalar), ans_scalar, free(eu);
    free(ev);)
