BATCH_SIZE = 16
NUM_EPOCHS = 200
# BATCH_SIZE = 16
# NUM_EPOCHS = 10000
d_model = 256
num_heads = 8
num_layers=2
dim_feedforward=d_model*4
