from lime_tokenizer import LimeTokenizer

model_id = "google/gemma-2b"
lime_tok = LimeTokenizer.from_pretrained(model_id)

batched_encodings = lime_tok("This morning a bobcat in Japan", add_special_tokens=False)
print(batched_encodings.input_ids)                  # [1596, 5764, 476, 23191, 4991, 575, 5928]
print(batched_encodings.tokens())                   # ['This', '▁morning', '▁a', '▁bob', 'cat', '▁in', '▁Japan']
print(batched_encodings.tokens_pos_tags)            # ['DT', 'NN', 'DT', 'NN', 'NN', 'IN', 'NNP']
print(batched_encodings.tokens_pos_tags_ids)        # [11, 22, 11, 22, 22, 15, 23]
print(batched_encodings.tokens_ner_tags)            # ['TIME', 'TIME', '', '', '', '', 'GPE']
print(batched_encodings.tokens_ner_tags_ids)        # [16, 16, 18, 18, 18, 18, 4]
print(batched_encodings.ling_words)                 # ['This', ' morning', ' a', ' bobcat', ' in', ' Japan']
print(batched_encodings.ling_words_pos_tags)        # ['DT', 'NN', 'DT', 'NN', 'IN', 'NNP']
print(batched_encodings.ling_words_ner_tags)        # ['TIME', 'TIME', '', '', '', 'GPE']
