baseline:
  vocab_size: 16
  seq_len: 256
  num_tokens_to_copy: 16
  num_train_examples: 12800

changes:
  vocab_size: [32, 64, 128]
  seq_len: [512, 1024]
  num_train_examples: [6400, 3200, 1600, 800]
  num_tokens_to_copy: [32, 64, 96]