
import jsonlines
import argparse
from collections import defaultdict


def level_map(level):
    if level in [1, 1.5]: return 1
    elif level in [2, 2.25]: return 2
    elif level in [2.5]: return 3
    elif level in [3, 3.5]: return 4
    elif level in [4]: return 5
    else: raise ValueError('Unknown level')


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', type=str, required=True)
    parser.add_argument('--training', action='store_true')
    args = parser.parse_args()

    data = list(jsonlines.open(args.input_file))
    diff_level = defaultdict(int)
    if args.training:
        data = [item for item in data if 'clarification' in item['answer'].lower()]
    for item in data:
        # diff_level[level_map(item['level'])] += 1
        diff_level[item['level']] += 1
    print(len(data))
    diff_level = sorted(diff_level.items(), key=lambda x: x[0])
    diff_level = [(k,v/len(data)) for k,v in diff_level]
    if not args.training:
        print(diff_level)
        print({k:f"{v*100:.2f}%" for k, v in diff_level})
    print(sum([v for k,v in diff_level]))
if __name__ == '__main__':
    main()
