{"step-count": "-1", "arch": "cmlm_distill", "embed-loss-factor": "0.4", "lr": 0.001, "max-tokens": "8196", "max-tokens-valid": "4096", "dataset": "/storage/sajad/data-bin/wmt16_enro_distill", "teacher-path": "/storage/sajad/teacher_checkpoints/CMLMC_WMTenro_distill.pt", "source-lang": "en", "target-lang": "ro", "distill-loss-factor": "1", "noise": "random_mask", "teacher-iterative-steps": "2", "criterion": "nat_loss", "mask-policy": "uniform", "patience": "50", "keep-best-checkpoints": "20", "label-smoothing": "0.", "teacher-temp": "0.5", "teacher-ema": true, "teacher-ema-decay": "0.9992", "optimize-encoder": true, "optimize-length-predictor": true, "num-workers": "40", "insertCausalSelfAttn": true, "no-scale-embedding": true}