{"step-count": "-1", "teacher-path": "./results/checkpoints/WMT16enro_distill_CMLM_benchmark/imputer.pt", "embed-loss-factor": "0.1", "lr": "1e-3", "max-tokens": "2048", "max-tokens-valid": "4096", "dataset": "data-bin/wmt16_enro_distill", "source-lang": "en", "target-lang": "ro", "distill-loss-factor": "30", "noise": "full_mask", "teacher-iterative-steps": "2", "ctc-distill": true, "criterion": "ctc", "mask-policy": "block", "teacher-random-unmask": true, "distill-on-valid": false, "patience": 3}