#include <cstdio>
#include <set>
#include <algorithm>
#include <string>
#include <vector>
#include <map>
using namespace std;

set<pair<string, string> > es;

set<string> labels;

map<string, vector<string>> edges_of;

int main(int argc, char ** argv) {
  if(argc != 4) exit(1);

  int selfloops = 0;
  
  FILE *f = fopen(argv[1], "r");
  if(!f) { printf("file missing: %s\n", argv[1]); exit(1); }
  while(!feof(f)) {
    char a[100], b[100];
    a[0] = 0;
    fscanf(f, "%s%s", a, b);
    if(a[0] == 0) break;
    string sa = a, sb = b;
    for(char& ch: sa) if(ch == '/') ch = '_';
    for(char& ch: sb) if(ch == '/') ch = '_';
    if(sa == sb) { selfloops++; continue; }
    es.emplace(sa, sb);
    labels.insert(sa);
    labels.insert(sb);
    }
  int dir = 0;
  for(auto e: es) if(es.count({e.second, e.first})) dir++;
  printf("directed: %d/%d, selfloops = %d\n", dir, int(es.size()), selfloops);
  for(auto e: es) es.emplace(e.second, e.first);

  for(auto e: es) {
    edges_of[e.first].push_back(e.second);
    }

  string root = *labels.begin();
  int N = edges_of.size();
  for(auto i: labels) if(edges_of[i].size() > edges_of[root].size()) root = i;

  printf("root: %s of degree %d\n", root.c_str(), int(edges_of[root].size()));

  FILE *gtxt = fopen(argv[2], "w");
  FILE *gcsv = fopen(argv[3], "w");
  fprintf(gcsv, "id1,id2,weight\n");

  int qty = 0;
  int nodes = 0;

  vector<string> visited = {root};
  for(int i=0; i<int(visited.size()); i++) {
    string at = visited[i];
    for(string& v: edges_of[at]) {
       fprintf(gcsv, "%s,%s,1\n", at.c_str(), v.c_str());
       fprintf(gtxt, "%s\t%s\n", at.c_str(), v.c_str());
       visited.push_back(v);
       }
    if(edges_of[at].size()) {
      nodes++;
      qty += edges_of[at].size();
      edges_of[at].clear();
      }
    }

  printf("giant: %d/%d with %d nodes\n", qty, int(es.size()), nodes);
  return 0;
  }
