from os import path
import sys
from nltk import CFG
from nltk.parse.generate import generate
from nltk.grammar import Nonterminal

script_dir = path.dirname(__file__)

def parse_file(path_to_rlang_file):
    with open(path.join(script_dir, path_to_rlang_file), 'r') as f:
        lines = f.readlines()
        vocab = {}
        
        for line in lines:
            split = line.split()
            if len(split) > 1 and split[0] in set(['Factor', 'Action', 'Proposition', 'Feature', 'MarkovFeature', ]):
                if split[0] in vocab.keys():
                    vocab[split[0]].append(split[1])
                else:
                    vocab[split[0]] = [split[1]]

        print(vocab)

def main(argv):
    if len(argv) != 2:
        print('Invalid number of arguments')
        print(f'Expected input: `python3 pull_rlang_vocab.py <RELATIVE_PATH_TO_RLANG_FILE>`')
        return

    parse_file(argv[1])

if __name__ == '__main__':
    main(sys.argv)