
#include "bench_harness.h"
#include "bench_utils.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void kernel_run(int n, const int *eu, const int *ev, const int *val,
                double *ans_out) {
  int m = n - 1;
  int *head = (int *)malloc((size_t)n * sizeof(int));
  int *to = (int *)malloc((size_t)(2 * m) * sizeof(int));
  int *nxt = (int *)malloc((size_t)(2 * m) * sizeof(int));
  int *parent = (int *)malloc((size_t)n * sizeof(int));
  int *depth = (int *)malloc((size_t)n * sizeof(int));
  int *q = (int *)malloc((size_t)n * sizeof(int));
  int *order = (int *)malloc((size_t)n * sizeof(int));
  int *sz = (int *)malloc((size_t)n * sizeof(int));
  int *heavy = (int *)malloc((size_t)n * sizeof(int));
  int *top = (int *)malloc((size_t)n * sizeof(int));
  int *pos = (int *)malloc((size_t)n * sizeof(int));
  int *stack_node = (int *)malloc((size_t)n * sizeof(int));
  int *stack_head = (int *)malloc((size_t)n * sizeof(int));
  long long *bit = (long long *)malloc((size_t)(n + 2) * sizeof(long long));
  if (!head || !to || !nxt || !parent || !depth || !q || !order || !sz ||
      !heavy || !top || !pos || !stack_node || !stack_head || !bit) {
    if (head)
      free(head);
    if (to)
      free(to);
    if (nxt)
      free(nxt);
    if (parent)
      free(parent);
    if (depth)
      free(depth);
    if (q)
      free(q);
    if (order)
      free(order);
    if (sz)
      free(sz);
    if (heavy)
      free(heavy);
    if (top)
      free(top);
    if (pos)
      free(pos);
    if (stack_node)
      free(stack_node);
    if (stack_head)
      free(stack_head);
    if (bit)
      free(bit);
    *ans_out = 0.0;
    return;
  }
  for (int i = 0; i < n; i++)
    head[i] = -1;
  int ei = 0;
  for (int i = 0; i < m; i++) {
    int a = eu[i];
    int b = ev[i];
    to[ei] = b;
    nxt[ei] = head[a];
    head[a] = ei;
    ei++;
    to[ei] = a;
    nxt[ei] = head[b];
    head[b] = ei;
    ei++;
  }
  parent[0] = 0;
  depth[0] = 0;
  int qh = 0, qt = 0, ord_sz = 0;
  q[qt++] = 0;
  while (qh < qt) {
    int v = q[qh++];
    order[ord_sz++] = v;
    for (int e = head[v]; e != -1; e = nxt[e]) {
      int u = to[e];
      if (u == parent[v])
        continue;
      parent[u] = v;
      depth[u] = depth[v] + 1;
      q[qt++] = u;
    }
  }
  for (int idx = ord_sz - 1; idx >= 0; idx--) {
    int v = order[idx];
    int maxsz_local = 0;
    sz[v] = 1;
    heavy[v] = -1;
    for (int e = head[v]; e != -1; e = nxt[e]) {
      int u = to[e];
      if (parent[u] == v) {
        sz[v] += sz[u];
        if (sz[u] > maxsz_local) {
          maxsz_local = sz[u];
          heavy[v] = u;
        }
      }
    }
  }
  int cur_pos = 0;
  int sp = 0;
  stack_node[sp] = 0;
  stack_head[sp] = 0;
  sp++;
  while (sp > 0) {
    sp--;
    int v0 = stack_node[sp];
    int h0 = stack_head[sp];
    int cur = v0;
    int hcur = h0;
    while (cur != -1) {
      top[cur] = hcur;
      pos[cur] = cur_pos++;
      for (int e = head[cur]; e != -1; e = nxt[e]) {
        int u = to[e];
        if (parent[u] == cur && u != heavy[cur]) {
          stack_node[sp] = u;
          stack_head[sp] = u;
          sp++;
        }
      }
      cur = heavy[cur];
    }
  }
  double ans_double = 0.0;
  for (int i = 0; i <= n + 1; i++)
    bit[i] = 0;
  for (int v = 0; v < n; v++) {
    int idxFenw = pos[v] + 1;
    long long delta = (long long)val[v];
    for (int p = idxFenw; p <= n; p += p & -p) {
      bit[p] += delta;
    }
  }
  long long acc = 0;
  for (int t = 0; t < n; t++) {
    int u = t;
    int v = (t * 7 + 13) % n;
    {
      int idxFenw = pos[u] + 1;
      for (int p = idxFenw; p <= n; p += p & -p) {
        bit[p] += 1;
      }
    }
    {
      int uu = u;
      int vv = v;
      long long res = 0;
      while (top[uu] != top[vv]) {
        if (depth[top[uu]] < depth[top[vv]]) {
          int tmp = uu;
          uu = vv;
          vv = tmp;
        }
        int l = pos[top[uu]];
        int r = pos[uu];
        long long sum_r = 0;
        int pr = r + 1;
        while (pr > 0) {
          sum_r += bit[pr];
          pr -= pr & -pr;
        }
        long long sum_lm1 = 0;
        int pl = l;
        while (pl > 0) {
          sum_lm1 += bit[pl];
          pl -= pl & -pl;
        }
        res += (sum_r - sum_lm1);
        uu = parent[top[uu]];
      }
      if (depth[uu] < depth[vv]) {
        int tmp2 = uu;
        uu = vv;
        vv = tmp2;
      }
      int l2 = pos[vv];
      int r2 = pos[uu];
      long long sum_r2 = 0;
      int pr2 = r2 + 1;
      while (pr2 > 0) {
        sum_r2 += bit[pr2];
        pr2 -= pr2 & -pr2;
      }
      long long sum_lm12 = 0;
      int pl2 = l2;
      while (pl2 > 0) {
        sum_lm12 += bit[pl2];
        pl2 -= pl2 & -pl2;
      }
      res += (sum_r2 - sum_lm12);
      acc += res;
    }
  }
  ans_double = (double)acc;

  *ans_out = ans_double;
  free(head);
  free(to);
  free(nxt);
  free(parent);
  free(depth);
  free(q);
  free(order);
  free(sz);
  free(heavy);
  free(top);
  free(pos);
  free(stack_node);
  free(stack_head);
  free(bit);
}
BENCH_MAIN_SCALAR3(
    T003_Code_023, HLD, 4096, 16384, 65536,
    int *eu = (int *)malloc((size_t)(n - 1) * sizeof(int));
    int *ev = (int *)malloc((size_t)(n - 1) * sizeof(int));
    int *val = (int *)malloc((size_t)n * sizeof(int)); double ans_scalar = 0.0;
    ,
    {
      bench_rng64_t rng = bench_rng_init(seed);
      for (int i = 1; i < n; i++) {
        int p = (int)(bench_rng_next(&rng) % (unsigned long long)i);
        eu[i - 1] = i;
        ev[i - 1] = p;
      }
      for (int i = 0; i < n; i++) {
        val[i] = (int)(bench_rng_next(&rng) % 10ULL);
      }
    },
    kernel_run(n, eu, ev, val, &ans_scalar), ans_scalar, free(eu);
    free(ev); free(val);)
