#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdlib.h>

void kernel_run(int n, int V, int E, const int *edge_u, const int *edge_v,
                const int *edge_cap, const int *edge_cost, double *ans_out) {
  {
    const int INF = 2147483647;

    int s = 0;
    int t = V - 1;
    if (t < 0)
      t = 0;

    int maxe = E * 2 + 2;

    int *head = (int *)malloc((size_t)V * sizeof(int));
    int *to = (int *)malloc((size_t)maxe * sizeof(int));
    int *cap = (int *)malloc((size_t)maxe * sizeof(int));
    int *cost = (int *)malloc((size_t)maxe * sizeof(int));
    int *nxt = (int *)malloc((size_t)maxe * sizeof(int));
    int *rev = (int *)malloc((size_t)maxe * sizeof(int));

    if (!head || !to || !cap || !cost || !nxt || !rev) {
      if (head)
        free(head);
      if (to)
        free(to);
      if (cap)
        free(cap);
      if (cost)
        free(cost);
      if (nxt)
        free(nxt);
      if (rev)
        free(rev);
      *ans_out = 0.0;
      return;
    }

    for (int i = 0; i < V; i++) {
      head[i] = -1;
    }

    int ecnt = 0;
    for (int i = 0; i < E; i++) {
      int u = edge_u[i];
      int v = edge_v[i];
      int c = edge_cap[i];
      int w = edge_cost[i];

      if (u < 0)
        u = 0;
      if (u >= V)
        u %= V;
      if (v < 0)
        v = 0;
      if (v >= V)
        v %= V;
      if (c <= 0)
        c = 1;

      to[ecnt] = v;
      cap[ecnt] = c;
      cost[ecnt] = w;
      nxt[ecnt] = head[u];
      rev[ecnt] = ecnt + 1;
      head[u] = ecnt;

      to[ecnt + 1] = u;
      cap[ecnt + 1] = 0;
      cost[ecnt + 1] = -w;
      nxt[ecnt + 1] = head[v];
      rev[ecnt + 1] = ecnt;
      head[v] = ecnt + 1;

      ecnt += 2;
    }

    long long total_flow = 0;
    long long total_cost = 0;

    int *dist = (int *)malloc((size_t)V * sizeof(int));
    int *inqueue = (int *)malloc((size_t)V * sizeof(int));
    int *prev_edge = (int *)malloc((size_t)V * sizeof(int));
    int *q = (int *)malloc((size_t)V * sizeof(int));

    if (!dist || !inqueue || !prev_edge || !q) {
      if (dist)
        free(dist);
      if (inqueue)
        free(inqueue);
      if (prev_edge)
        free(prev_edge);
      if (q)
        free(q);
      free(head);
      free(to);
      free(cap);
      free(cost);
      free(nxt);
      free(rev);
      *ans_out = 0.0;
      return;
    }

    for (;;) {
      for (int i = 0; i < V; i++) {
        dist[i] = INF;
        inqueue[i] = 0;
        prev_edge[i] = -1;
      }

      dist[s] = 0;

      int qh = 0;
      int qt = 0;
      int qlen = 0;

      q[qt] = s;
      qt = (qt + 1) % V;
      qlen++;
      inqueue[s] = 1;

      while (qlen > 0) {
        int u = q[qh];
        qh = (qh + 1) % V;
        qlen--;
        inqueue[u] = 0;

        for (int e = head[u]; e != -1; e = nxt[e]) {
          if (cap[e] > 0) {
            int v = to[e];
            int nd = dist[u] + cost[e];
            if (nd < dist[v]) {
              dist[v] = nd;
              prev_edge[v] = e;
              if (!inqueue[v]) {
                q[qt] = v;
                qt = (qt + 1) % V;
                qlen++;
                inqueue[v] = 1;
              }
            }
          }
        }
      }

      if (prev_edge[t] == -1) {
        break;
      }

      int aug = INF;
      int v = t;
      while (v != s) {
        int e = prev_edge[v];
        if (cap[e] < aug) {
          aug = cap[e];
        }
        v = to[rev[e]];
      }

      v = t;
      while (v != s) {
        int e = prev_edge[v];
        cap[e] -= aug;
        cap[rev[e]] += aug;
        v = to[rev[e]];
      }

      total_flow += (long long)aug;
      total_cost += (long long)aug * (long long)dist[t];
    }

    *ans_out = (double)total_cost;

    free(dist);
    free(inqueue);
    free(prev_edge);
    free(q);
    free(head);
    free(to);
    free(cap);
    free(cost);
    free(nxt);
    free(rev);
  }
}

BENCH_MAIN_SCALAR3(
    T003_Code_016, MCMF, 4096, 16384, 65536, int V = n / 1024 + 4;
    if (V > 64) V = 64; if (V < 2) V = 2; int baseE = (V > 1 ? (V - 1) : 0);
    int extraE = V * 3; int E = baseE + extraE;
    int *eu = (int *)malloc((size_t)E * sizeof(int));
    int *ev = (int *)malloc((size_t)E * sizeof(int));
    int *ecap = (int *)malloc((size_t)E * sizeof(int));
    int *ecost = (int *)malloc((size_t)E * sizeof(int));
    double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      int idx = 0;

      for (int i = 0; i < V - 1 && idx < E; i++) {
        eu[idx] = i;
        ev[idx] = i + 1;
        ecap[idx] = (int)((bench_rng_next(&rng) % 10ULL) + 1ULL);
        ecost[idx] = (int)(bench_rng_next(&rng) % 11ULL);
        idx++;
      }

      while (idx < E) {
        int u = (int)(bench_rng_next(&rng) % (uint64_t)V);
        int v = (int)(bench_rng_next(&rng) % (uint64_t)V);
        if (u == v) {
          v = (v + 1) % V;
        }
        eu[idx] = u;
        ev[idx] = v;
        ecap[idx] = (int)((bench_rng_next(&rng) % 10ULL) + 1ULL);
        ecost[idx] = (int)(bench_rng_next(&rng) % 11ULL);
        idx++;
      }
    },
    kernel_run(n, V, E, eu, ev, ecap, ecost, &ans_scalar), ans_scalar, free(eu);
    free(ev); free(ecap); free(ecost);)