import pprint
import re
import sys
from collections import defaultdict
from typing import Dict, Tuple

import flutes

DiffDict = Dict[str, Tuple[int, int]]

TAGS = ["n_success", "n_total"]


def parse_logs(path: str) -> Dict[str, Dict[str, int]]:
    r"""Reads and parse the compilation log generated by ``main.py``, and returns information for each repository."""
    with open(path, "r") as f:
        logs = f.read().split("\n")

    repo_info: Dict[str, Dict[str, int]] = defaultdict(dict)
    regex_success = re.compile(r"(?P<date_time>[0-9-]{10} [0-9:]{8}),\d{3} \w+: \(Worker \s*\d+\) "
                               r"(?P<n_success>\d+) \((?P<n_partial>\d+)\) out of (?P<n_total>\d+) Makefile\(s\) in "
                               r"(?P<repo_owner>\S+?)/(?P<repo_name>\S+?) compiled \(partially\), "
                               r"yielding (?P<n_binaries>\d+) binaries")
    regex_no_mkfile = re.compile(r"(?P<date_time>[0-9-]{10} [0-9:]{8}),\d{3} \w+: \(Worker \s*\d+\) "
                                 r"No Makefiles found in (?P<repo_owner>\S+?)/(?P<repo_name>\S+?), "
                                 r"repository deleted")
    for idx, line in enumerate(logs):
        match = regex_success.search(line)
        if match is not None:
            repo_owner, repo_name = match.group("repo_owner"), match.group("repo_name")
            repo_full_name = f"{repo_owner}/{repo_name}"
            for tag in TAGS:
                value = int(match.group(tag))
                repo_info[repo_full_name][tag] = value
        else:
            match = regex_no_mkfile.search(line)
            if match is not None:
                repo_owner, repo_name = match.group("repo_owner"), match.group("repo_name")
                repo_full_name = f"{repo_owner}/{repo_name}"
                for tag in TAGS:
                    repo_info[repo_full_name][tag] = 0
    return repo_info


def compare_logs(info_old: Dict[str, Dict[str, int]], info_new: Dict[str, Dict[str, int]]) -> Dict[str, DiffDict]:
    for repo_name in info_new:
        if repo_name not in info_old:
            flutes.log(f"{repo_name} missing in OLD log", "error")
    repo_diff: Dict[str, DiffDict] = defaultdict(dict)
    for repo_name in info_old:
        if repo_name not in info_new:
            flutes.log(f"{repo_name} missing in NEW", "error")
            continue
        old_repo_info = info_old[repo_name]
        new_repo_info = info_new[repo_name]
        difference = []
        for tag in TAGS:
            old_val = old_repo_info[tag]
            new_val = new_repo_info[tag]
            if old_val != new_val:
                difference.append(f"{tag} {old_val}->{new_val}")
                repo_diff[repo_name][tag] = (old_val, new_val)
        if len(difference) > 0:
            flutes.log(f"{repo_name}: {', '.join(difference)}")
    return repo_diff


def main():
    info_old = parse_logs(sys.argv[1])
    info_new = parse_logs(sys.argv[2])
    print(f"Old size: {len(info_old)}, New size: {len(info_new)}")
    repo_diff = compare_logs(info_old, info_new)

    for tag in TAGS:
        print(tag)
        pprint.pprint({
            repo_name: diff[tag]
            for repo_name, diff in repo_diff.items()
            if tag in diff and diff[tag][0] > diff[tag][1]
        })


if __name__ == '__main__':
    main()
