from unittest import TestCase
from transformers import AutoTokenizer
from reader.memory_line_reader import BatchSelfRegressionLineDataset


class ReaderUnittest(TestCase):
    def testRunnable(self):
        tokenizer = AutoTokenizer.from_pretrained('data/en_config')
        dataset = BatchSelfRegressionLineDataset(
            'data/wiki103/wiki_wsj.span.ids',
            tokenizer,
            batch_max_len=2048,
            min_len=2,
            batch_size=32,
            max_line=10000,
            input_type="ids",
            random=False,
            seperator=" "
        )