import sys
from collections import defaultdict
import numpy as np
import json

# run and save event_trials
# nohup bash sample_experiments_trials.sh GPU_DEVICE > logs/sample_experiments_trials.log &
# pipe in the grepped output to this file
# cat logs/sample_experiments_trials.log | grep -A 5 -E "testing|evaluating test data" | python parse_sample_logs.py

# nested dict, with outer dict being the samples and inner being the len
# stats[j][1][0] for variable length accuracy
# stats[j][1][ i > 0] for i length sub accuracy
stats = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
length = None
samples = None
for line in sys.stdin:
    # "testing max rule length $max_rule_len with $samples samples"
    if 'testing max' in line:
        length, samples = [int(w) for w in line.split() if w.isdigit()]
    # HITS@1 overall: 0.2 by len: {'rule len 1': 0.76, 'rule len 2': 0.04, 'rule len 3': 0.0, 'rule len 4': 0.0}
    if 'HITS@10' in line:
        var_len_acc = float(line.split()[2])
        stats[samples][length][0].append(var_len_acc)

        str_dict = line.split('len: ')[1].replace("\'", "\"")
        j_dict = json.loads(str_dict)
        for len_str, value in j_dict.items(): 
            len_val = int(len_str.split()[-1]) 
            val_float = float(value)
            stats[samples][length][len_val].append(val_float)

for samples, event_stats in stats.items():
    print(f'm={samples} samples')
    for max_len, len_stats in event_stats.items():
        print(f'Max rule length n={max_len}')
        for sub_len, scores in len_stats.items():
            if sub_len == 0:
                msg = 'Var len accuracy'
            else:
                msg = f'Len {sub_len} accuracy'
            mean = np.mean(scores)
            std = np.std(scores)
            print(f'{msg}: {mean:.3f} ± {std:.3f}')
    print('')
