_name: null
common:
  _name: null
  no_progress_bar: false
  log_interval: 100
  log_format: null
  tensorboard_logdir: null
  wandb_project: null
  azureml_logging: false
  seed: 1
  cpu: false
  tpu: false
  bf16: false
  memory_efficient_bf16: false
  fp16: false
  memory_efficient_fp16: false
  fp16_no_flatten_grads: false
  fp16_init_scale: 128
  fp16_scale_window: null
  fp16_scale_tolerance: 0.0
  min_loss_scale: 0.0001
  threshold_loss_scale: null
  user_dir: null
  empty_cache_freq: 0
  all_gather_list_size: 16384
  model_parallel_size: 1
  quantization_config_path: null
  profile: false
  reset_logging: true
  suppress_crashes: false
common_eval:
  _name: null
  path: null
  post_process: null
  quiet: false
  model_overrides: '{}'
  results_path: null
distributed_training:
  _name: null
  distributed_world_size: 1
  distributed_rank: 0
  distributed_backend: nccl
  distributed_init_method: null
  distributed_port: -1
  device_id: 0
  distributed_no_spawn: false
  ddp_backend: c10d
  bucket_cap_mb: 25
  fix_batches_to_gpus: false
  find_unused_parameters: false
  fast_stat_sync: false
  heartbeat_timeout: -1
  broadcast_buffers: false
  distributed_wrapper: DDP
  slowmo_momentum: null
  slowmo_algorithm: LocalSGD
  localsgd_frequency: 3
  nprocs_per_node: 1
  pipeline_model_parallel: false
  pipeline_balance: null
  pipeline_devices: null
  pipeline_chunks: 0
  pipeline_encoder_balance: null
  pipeline_encoder_devices: null
  pipeline_decoder_balance: null
  pipeline_decoder_devices: null
  pipeline_checkpoint: never
  zero_sharding: none
  tpu: false
dataset:
  _name: null
  num_workers: 1
  skip_invalid_size_inputs_valid_test: false
  max_tokens: 4096
  batch_size: null
  required_batch_size_multiple: 8
  required_seq_len_multiple: 1
  dataset_impl: null
  data_buffer_size: 10
  train_subset: train
  valid_subset: valid
  validate_interval: 1
  validate_interval_updates: 0
  validate_after_updates: 0
  fixed_validation_seed: null
  disable_validation: false
  max_tokens_valid: 4096
  batch_size_valid: null
  curriculum: 0
  gen_subset: test
  num_shards: 1
  shard_id: 0
optimization:
  _name: null
  max_epoch: 0
  max_update: 0
  stop_time_hours: 0.0
  clip_norm: 0.0
  sentence_avg: false
  update_freq:
  - 1
  lr:
  - 0.0005
  stop_min_lr: -1.0
  use_bmuf: false
checkpoint:
  _name: null
  save_dir: checkpoints
  restore_file: checkpoint_last.pt
  finetune_from_model: null
  reset_dataloader: false
  reset_lr_scheduler: false
  reset_meters: false
  reset_optimizer: false
  optimizer_overrides: '{}'
  save_interval: 1
  save_interval_updates: 0
  keep_interval_updates: -1
  keep_last_epochs: -1
  keep_best_checkpoints: -1
  no_save: false
  no_epoch_checkpoints: true
  no_last_checkpoints: false
  no_save_optimizer_state: false
  best_checkpoint_metric: bleu
  maximize_best_checkpoint_metric: true
  patience: -1
  checkpoint_suffix: ''
  checkpoint_shard_count: 1
  load_checkpoint_on_all_dp_ranks: false
  model_parallel_size: 1
  distributed_rank: 0
bmuf:
  _name: null
  block_lr: 1.0
  block_momentum: 0.875
  global_sync_iter: 50
  warmup_iterations: 500
  use_nbm: false
  average_sync: false
  distributed_world_size: 1
generation:
  _name: null
  beam: 5
  nbest: 1
  max_len_a: 0.0
  max_len_b: 200
  min_len: 1
  match_source_len: false
  unnormalized: false
  no_early_stop: false
  no_beamable_mm: false
  lenpen: 1.0
  unkpen: 0.0
  replace_unk: null
  sacrebleu: false
  score_reference: false
  prefix_size: 0
  no_repeat_ngram_size: 0
  sampling: false
  sampling_topk: -1
  sampling_topp: -1.0
  constraints: null
  temperature: 1.0
  diverse_beam_groups: -1
  diverse_beam_strength: 0.5
  diversity_rate: -1.0
  print_alignment: null
  print_step: false
  lm_path: null
  lm_weight: 0.0
  iter_decode_eos_penalty: 0.0
  iter_decode_max_iter: 10
  iter_decode_force_max_iter: false
  iter_decode_with_beam: 1
  iter_decode_with_external_reranker: false
  retain_iter_history: false
  retain_dropout: false
  retain_dropout_modules: null
  decoding_format: null
  no_seed_provided: false
eval_lm:
  _name: null
  output_word_probs: false
  output_word_stats: false
  context_window: 0
  softmax_batch: 9223372036854775807
interactive:
  _name: null
  buffer_size: 0
  input: '-'
model:
  no_progress_bar: false
  log_interval: 100
  log_format: null
  tensorboard_logdir: null
  wandb_project: null
  azureml_logging: false
  seed: 1
  cpu: false
  tpu: false
  bf16: false
  memory_efficient_bf16: false
  fp16: false
  memory_efficient_fp16: false
  fp16_no_flatten_grads: false
  fp16_init_scale: 128
  fp16_scale_window: null
  fp16_scale_tolerance: 0.0
  min_loss_scale: 0.0001
  threshold_loss_scale: null
  user_dir: null
  empty_cache_freq: 0
  all_gather_list_size: 16384
  model_parallel_size: 1
  quantization_config_path: null
  profile: false
  reset_logging: true
  suppress_crashes: false
  criterion: label_smoothed_cross_entropy
  tokenizer: null
  bpe: null
  optimizer: adam
  lr_scheduler: inverse_sqrt
  scoring: bleu
  task: translation
  num_workers: 1
  skip_invalid_size_inputs_valid_test: false
  max_tokens: 4096
  batch_size: null
  required_batch_size_multiple: 8
  required_seq_len_multiple: 1
  dataset_impl: null
  data_buffer_size: 10
  train_subset: train
  valid_subset: valid
  validate_interval: 1
  validate_interval_updates: 0
  validate_after_updates: 0
  fixed_validation_seed: null
  disable_validation: false
  max_tokens_valid: 4096
  batch_size_valid: null
  curriculum: 0
  gen_subset: test
  num_shards: 1
  shard_id: 0
  distributed_world_size: 1
  distributed_rank: 0
  distributed_backend: nccl
  distributed_init_method: null
  distributed_port: -1
  device_id: 0
  distributed_no_spawn: false
  ddp_backend: c10d
  bucket_cap_mb: 25
  fix_batches_to_gpus: false
  find_unused_parameters: false
  fast_stat_sync: false
  heartbeat_timeout: -1
  broadcast_buffers: false
  distributed_wrapper: DDP
  slowmo_momentum: null
  slowmo_algorithm: LocalSGD
  localsgd_frequency: 3
  nprocs_per_node: 1
  pipeline_model_parallel: false
  pipeline_balance: null
  pipeline_devices: null
  pipeline_chunks: 0
  pipeline_encoder_balance: null
  pipeline_encoder_devices: null
  pipeline_decoder_balance: null
  pipeline_decoder_devices: null
  pipeline_checkpoint: never
  zero_sharding: none
  arch: transformer_iwslt_de_en
  max_epoch: 0
  max_update: 0
  stop_time_hours: 0
  clip_norm: 0.0
  sentence_avg: false
  update_freq: &id001
  - 1
  lr: &id002
  - 0.0005
  stop_min_lr: -1.0
  use_bmuf: false
  save_dir: checkpoints
  restore_file: checkpoint_last.pt
  finetune_from_model: null
  reset_dataloader: false
  reset_lr_scheduler: false
  reset_meters: false
  reset_optimizer: false
  optimizer_overrides: '{}'
  save_interval: 1
  save_interval_updates: 0
  keep_interval_updates: -1
  keep_last_epochs: -1
  keep_best_checkpoints: -1
  no_save: false
  no_epoch_checkpoints: true
  no_last_checkpoints: false
  no_save_optimizer_state: false
  best_checkpoint_metric: bleu
  maximize_best_checkpoint_metric: true
  patience: -1
  checkpoint_suffix: ''
  checkpoint_shard_count: 1
  load_checkpoint_on_all_dp_ranks: false
  no_token_positional_embeddings: false
  no_cross_attention: false
  cross_self_attention: false
  encoder_layerdrop: 0
  decoder_layerdrop: 0
  encoder_layers_to_keep: null
  decoder_layers_to_keep: null
  quant_noise_pq: 0
  quant_noise_pq_block_size: 8
  quant_noise_scalar: 0
  attention_type: Standard
  data: None
  source_lang: null
  target_lang: null
  load_alignments: false
  left_pad_source: 'True'
  left_pad_target: 'False'
  max_source_positions: 1024
  max_target_positions: 1024
  upsample_primary: 1
  truncate_source: false
  num_batch_buckets: 0
  eval_bleu: true
  eval_bleu_detok: moses
  eval_bleu_detok_args: null
  eval_tokenized_bleu: false
  eval_bleu_remove_bpe: '@@ '
  eval_bleu_args: '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}'
  eval_bleu_print_samples: true
  label_smoothing: 0.1
  report_accuracy: false
  ignore_prefix_size: 0
  adam_betas: (0.9, 0.98)
  adam_eps: 1.0e-08
  weight_decay: 0.0001
  use_old_adam: false
  warmup_updates: 4000
  warmup_init_lr: -1
  pad: 1
  eos: 2
  unk: 3
  share_decoder_input_output_embed: true
  encoder_embed_dim: 512
  encoder_ffn_embed_dim: 1024
  decoder_embed_dim: 512
  decoder_ffn_embed_dim: 1024
  dropout: 0.3
  no_seed_provided: false
  encoder_attention_heads: 4
  encoder_layers: 6
  decoder_attention_heads: 4
  decoder_layers: 6
  attention_rules: 1
  concat: false
  encoder_embed_path: null
  encoder_normalize_before: false
  encoder_learned_pos: false
  decoder_embed_path: null
  decoder_normalize_before: false
  decoder_learned_pos: false
  attention_dropout: 0.0
  activation_dropout: 0.0
  activation_fn: relu
  adaptive_softmax_cutoff: null
  adaptive_softmax_dropout: 0
  share_all_embeddings: false
  adaptive_input: false
  decoder_output_dim: 512
  decoder_input_dim: 512
  no_scale_embedding: false
  layernorm_embedding: false
  tie_adaptive_weights: false
  checkpoint_activations: false
  _name: transformer_iwslt_de_en
task:
  no_progress_bar: false
  log_interval: 100
  log_format: null
  tensorboard_logdir: null
  wandb_project: null
  azureml_logging: false
  seed: 1
  cpu: false
  tpu: false
  bf16: false
  memory_efficient_bf16: false
  fp16: false
  memory_efficient_fp16: false
  fp16_no_flatten_grads: false
  fp16_init_scale: 128
  fp16_scale_window: null
  fp16_scale_tolerance: 0.0
  min_loss_scale: 0.0001
  threshold_loss_scale: null
  user_dir: null
  empty_cache_freq: 0
  all_gather_list_size: 16384
  model_parallel_size: 1
  quantization_config_path: null
  profile: false
  reset_logging: true
  suppress_crashes: false
  criterion: label_smoothed_cross_entropy
  tokenizer: null
  bpe: null
  optimizer: adam
  lr_scheduler: inverse_sqrt
  scoring: bleu
  task: translation
  num_workers: 1
  skip_invalid_size_inputs_valid_test: false
  max_tokens: 4096
  batch_size: null
  required_batch_size_multiple: 8
  required_seq_len_multiple: 1
  dataset_impl: null
  data_buffer_size: 10
  train_subset: train
  valid_subset: valid
  validate_interval: 1
  validate_interval_updates: 0
  validate_after_updates: 0
  fixed_validation_seed: null
  disable_validation: false
  max_tokens_valid: 4096
  batch_size_valid: null
  curriculum: 0
  gen_subset: test
  num_shards: 1
  shard_id: 0
  distributed_world_size: 1
  distributed_rank: 0
  distributed_backend: nccl
  distributed_init_method: null
  distributed_port: -1
  device_id: 0
  distributed_no_spawn: false
  ddp_backend: c10d
  bucket_cap_mb: 25
  fix_batches_to_gpus: false
  find_unused_parameters: false
  fast_stat_sync: false
  heartbeat_timeout: -1
  broadcast_buffers: false
  distributed_wrapper: DDP
  slowmo_momentum: null
  slowmo_algorithm: LocalSGD
  localsgd_frequency: 3
  nprocs_per_node: 1
  pipeline_model_parallel: false
  pipeline_balance: null
  pipeline_devices: null
  pipeline_chunks: 0
  pipeline_encoder_balance: null
  pipeline_encoder_devices: null
  pipeline_decoder_balance: null
  pipeline_decoder_devices: null
  pipeline_checkpoint: never
  zero_sharding: none
  arch: transformer_iwslt_de_en
  max_epoch: 0
  max_update: 0
  stop_time_hours: 0
  clip_norm: 0.0
  sentence_avg: false
  update_freq: *id001
  lr: *id002
  stop_min_lr: -1.0
  use_bmuf: false
  save_dir: checkpoints
  restore_file: checkpoint_last.pt
  finetune_from_model: null
  reset_dataloader: false
  reset_lr_scheduler: false
  reset_meters: false
  reset_optimizer: false
  optimizer_overrides: '{}'
  save_interval: 1
  save_interval_updates: 0
  keep_interval_updates: -1
  keep_last_epochs: -1
  keep_best_checkpoints: -1
  no_save: false
  no_epoch_checkpoints: true
  no_last_checkpoints: false
  no_save_optimizer_state: false
  best_checkpoint_metric: bleu
  maximize_best_checkpoint_metric: true
  patience: -1
  checkpoint_suffix: ''
  checkpoint_shard_count: 1
  load_checkpoint_on_all_dp_ranks: false
  no_token_positional_embeddings: false
  no_cross_attention: false
  cross_self_attention: false
  encoder_layerdrop: 0
  decoder_layerdrop: 0
  encoder_layers_to_keep: null
  decoder_layers_to_keep: null
  quant_noise_pq: 0
  quant_noise_pq_block_size: 8
  quant_noise_scalar: 0
  data: /network/tmp1/chenant/sharing/rrims/fairseq/data-bin/iwslt14.tokenized.de-en
  source_lang: null
  target_lang: null
  load_alignments: false
  left_pad_source: 'True'
  left_pad_target: 'False'
  max_source_positions: 1024
  max_target_positions: 1024
  upsample_primary: 1
  truncate_source: false
  num_batch_buckets: 0
  eval_bleu: true
  eval_bleu_detok: moses
  eval_bleu_detok_args: null
  eval_tokenized_bleu: false
  eval_bleu_remove_bpe: '@@ '
  eval_bleu_args: '{"beam": 5, "max_len_a": 1.2, "max_len_b": 10}'
  eval_bleu_print_samples: true
  label_smoothing: 0.1
  report_accuracy: false
  ignore_prefix_size: 0
  adam_betas: (0.9, 0.98)
  adam_eps: 1.0e-08
  weight_decay: 0.0001
  use_old_adam: false
  warmup_updates: 4000
  warmup_init_lr: -1
  pad: 1
  eos: 2
  unk: 3
  share_decoder_input_output_embed: true
  encoder_embed_dim: 512
  encoder_ffn_embed_dim: 1024
  decoder_embed_dim: 512
  decoder_ffn_embed_dim: 1024
  dropout: 0.3
  no_seed_provided: false
  encoder_attention_heads: 4
  encoder_layers: 6
  decoder_attention_heads: 4
  decoder_layers: 6
  attention_rules: 1
  concat: false
  encoder_embed_path: null
  encoder_normalize_before: false
  encoder_learned_pos: false
  decoder_embed_path: null
  decoder_normalize_before: false
  decoder_learned_pos: false
  attention_dropout: 0.0
  activation_dropout: 0.0
  activation_fn: relu
  adaptive_softmax_cutoff: null
  adaptive_softmax_dropout: 0
  share_all_embeddings: false
  adaptive_input: false
  decoder_output_dim: 512
  decoder_input_dim: 512
  no_scale_embedding: false
  layernorm_embedding: false
  tie_adaptive_weights: false
  checkpoint_activations: false
  _name: translation
criterion:
  _name: label_smoothed_cross_entropy
  label_smoothing: 0.1
  report_accuracy: false
  ignore_prefix_size: 0
  sentence_avg: false
optimizer:
  _name: adam
  adam_betas: (0.9, 0.98)
  adam_eps: 1.0e-08
  weight_decay: 0.0001
  use_old_adam: false
  tpu: false
  lr:
  - 0.0005
lr_scheduler:
  _name: inverse_sqrt
  warmup_updates: 4000
  warmup_init_lr: -1.0
  lr:
  - 0.0005
scoring:
  _name: bleu
  pad: 1
  eos: 2
  unk: 3
bpe: null
tokenizer: null

