{"step-count": "-1", "teacher-path": "./results/checkpoints/WMT16enro_distill_CMLM_benchmark/CMLMC.pt", "embed-loss-factor": "1", "cross-attn-loss-factor": "0", "correction-loss-factor": "0", "orig-loss-factor": "0", "teacher-temp": "0.05", "std-temp": "4", "lr": "1e-3", "max-tokens-valid": "4096", "dataset": "data-bin/wmt16_enro_distill", "insertCausalSelfAttn": true, "no-scale-embedding": true, "source-lang": "en", "target-lang": "ro", "teacher-ema": true, "teacher-ema-decay": "0.9995", "optimize-encoder": true, "optimize-length-predictor": true}