flydata:
    training:
        random_seed: 123
        datapath: /home/username/cloud-data/datasets/wikitext-103/wikitext-103-processed/memformer_document_all/wiki103_train.arrow
        tokenizer:
            vocab_file: /home/username/cloud-data/tokenizers/MemformerBPE/vocab.json
            merges_file: /home/username/cloud-data/tokenizers/MemformerBPE/merges.txt
        processing:
            random_max_seq_len_offset: 10
            startoftext_id: 50261
            endoftext_id: 50260
            pad_token_id: 1
            multitasks: [TextContinuation, TextInfilling, TextRecall]
            multitasks_probs: [0.7, 0.2, 0.1]
            mask_token_prob: 0.07
            mask_possion_lambda: 3.5
            max_seq_len: 64
            time_horizon: 8
        plasma: Null # disable plasma
        dataloader:
            # when using flydata, it will overwrite the training batch size
            batch_size: 64
            in_series: 8
            timeout: Null
            drop_last: True
    validation:
        random_seed: 123
        datapath: /home/username/cloud-data/datasets/wikitext-103/wikitext-103-processed/memformer_document_all/wiki103_valid.arrow
        tokenizer:
            vocab_file: /home/username/cloud-data/tokenizers/MemformerBPE/vocab.json
            merges_file: /home/username/cloud-data/tokenizers/MemformerBPE/merges.txt
        processing:
            random_max_seq_len_offset: 0 # always 0
            startoftext_id: 50261
            endoftext_id: 50260
            pad_token_id: 1
            multitasks: [TextContinuation]
            multitasks_probs: [1.0]
            max_seq_len: 64 
            time_horizon: 1 # always one for validation
        plasma: Null # disable plasma
        dataloader:
            batch_size: 32
            in_series: 8
            timeout: Null
            drop_last: False