import sys
from collections import defaultdict
import numpy as np
import json
from pprint import pprint

# run and save event_trials
# nohup bash event_experiments_trials.sh GPU_DEVICE > logs/event_experiments_trials.log &
# pipe in the grepped output to this file
# cat logs/event_experiment_trials.log | grep -A 5 -E "testing|evaluating test data" | python parse_log_events.py

# nested dict, with outer dict being the events and inner being the len
# stats[j][1][0] for variable length accuracy
# stats[j][1][ i > 0] for i length sub accuracy
stats = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
length = None
event = None
for line in sys.stdin:
    # testing event complexity with 2 composite with rule length 4
    if 'testing event' in line:
        event, length = [int(w) for w in line.split() if w.isdigit()]
    # HITS@1 overall: 0.2 by len: {'rule len 1': 0.76, 'rule len 2': 0.04, 'rule len 3': 0.0, 'rule len 4': 0.0}
    if 'HITS@10' in line:
        var_len_acc = float(line.split()[2])
        stats[event][length][0].append(var_len_acc)

        str_dict = line.split('len: ')[1].replace("\'", "\"")
        j_dict = json.loads(str_dict)
        for len_str, value in j_dict.items(): 
            len_val = int(len_str.split()[-1]) 
            val_float = float(value)
            stats[event][length][len_val].append(val_float)

# pprint(dict(stats))
for event, event_stats in stats.items():
    print(f'Sampled j={event} events')
    for max_len, len_stats in event_stats.items():
        print(f'Max rule length n={max_len}')
        for sub_len, scores in len_stats.items():
            if sub_len == 0:
                msg = 'Var len accuracy'
            else:
                msg = f'Len {sub_len} accuracy'
            mean = np.mean(scores)
            std = np.std(scores)
            print(f'{msg}: {mean:.3f} ± {std:.3f}')
    print('')
