hydra_train is powered by Hydra.

== Configuration groups ==
Compose your configuration from those groups (group=option)

bpe: bert, byte_bpe, fastbpe, gpt2, hf_byte_bpe, sentencepiece, subword_nmt
criterion: adaptive_loss, cross_entropy, ctc, fastspeech2, hubert, label_smoothed_cross_entropy, label_smoothed_cross_entropy_with_alignment, label_smoothed_cross_entropy_with_ctc, label_smoothed_cross_entropy_with_rdrop, latency_augmented_label_smoothed_cross_entropy, masked_lm, model, nat_loss, sentence_prediction, sentence_prediction_adapters, speech_dlm_criterion, speech_to_spectrogram, speech_to_spectrogram_2pass, speech_to_unit, speech_to_unit_2pass, speech_unit_lm_criterion, tacotron2, wav2vec
fb_run_config: slurm
lr_scheduler: cosine, fixed, inverse_sqrt, pass_through, polynomial_decay, reduce_lr_on_plateau, step, tri_stage, triangular
model: hubert, hubert_ctc, hubert_seq2seq, multires_hubert, multires_hubert_ctc, speech_dlm, transformer_lm, transformer_ulm, wav2vec, wav2vec2, wav2vec2_laser, wav2vec_classification, wav2vec_ctc, wav2vec_seq2seq
model/transformer_lm: transformer_lm_baevski_gbw, transformer_lm_baevski_wiki103, transformer_lm_big, transformer_lm_gbw, transformer_lm_gpt, transformer_lm_gpt2_big, transformer_lm_gpt2_medium, transformer_lm_gpt2_small, transformer_lm_wiki103
model/wav2vec: vq_wav2vec_gumbel
model/wav2vec2: wav2vec2_base, wav2vec2_large
optimizer: adam, composite, cpu_adam, nag
scoring: bert_score, bleu, chrf, meteor, sacrebleu, wer
task: audio_classification, audio_finetuning, audio_pretraining, denoising, dummy_lm, dummy_masked_lm, hubert_pretraining, language_modeling, masked_lm, multilingual_denoising, multilingual_language_modeling, multires_hubert_pretraining, nlu_finetuning, sentence_prediction, sentence_prediction_adapters, simul_text_to_text, span_masked_lm, speech_dlm_task, speech_unit_modeling, translation, translation_from_pretrained_xlm, translation_lev
tokenizer: moses, nltk, space


== Config ==
Override anything in the config (foo.bar=value)

_name: null
common:
  _name: null
  no_progress_bar: false
  log_interval: 100
  log_format: null
  log_file: null
  aim_repo: null
  aim_run_hash: null
  tensorboard_logdir: null
  wandb_project: null
  azureml_logging: false
  seed: 1
  cpu: false
  tpu: false
  bf16: false
  memory_efficient_bf16: false
  fp16: false
  memory_efficient_fp16: false
  fp16_no_flatten_grads: false
  fp16_init_scale: 128
  fp16_scale_window: null
  fp16_scale_tolerance: 0.0
  on_cpu_convert_precision: false
  min_loss_scale: 0.0001
  threshold_loss_scale: null
  amp: false
  amp_batch_retries: 2
  amp_init_scale: 128
  amp_scale_window: null
  user_dir: null
  empty_cache_freq: 0
  all_gather_list_size: 16384
  model_parallel_size: 1
  quantization_config_path: null
  profile: false
  reset_logging: false
  suppress_crashes: false
  use_plasma_view: false
  plasma_path: /tmp/plasma
common_eval:
  _name: null
  path: null
  post_process: null
  quiet: false
  model_overrides: '{}'
  results_path: null
distributed_training:
  _name: null
  distributed_world_size: 1
  distributed_num_procs: 1
  distributed_rank: 0
  distributed_backend: nccl
  distributed_init_method: null
  distributed_port: -1
  device_id: 0
  distributed_no_spawn: false
  ddp_backend: pytorch_ddp
  ddp_comm_hook: none
  bucket_cap_mb: 25
  fix_batches_to_gpus: false
  find_unused_parameters: false
  gradient_as_bucket_view: false
  fast_stat_sync: false
  heartbeat_timeout: -1
  broadcast_buffers: false
  slowmo_momentum: null
  slowmo_base_algorithm: localsgd
  localsgd_frequency: 3
  nprocs_per_node: 1
  pipeline_model_parallel: false
  pipeline_balance: null
  pipeline_devices: null
  pipeline_chunks: 0
  pipeline_encoder_balance: null
  pipeline_encoder_devices: null
  pipeline_decoder_balance: null
  pipeline_decoder_devices: null
  pipeline_checkpoint: never
  zero_sharding: none
  fp16: ${common.fp16}
  memory_efficient_fp16: ${common.memory_efficient_fp16}
  tpu: ${common.tpu}
  no_reshard_after_forward: false
  fp32_reduce_scatter: false
  cpu_offload: false
  use_sharded_state: false
  not_fsdp_flatten_parameters: false
dataset:
  _name: null
  num_workers: 1
  skip_invalid_size_inputs_valid_test: false
  max_tokens: null
  batch_size: null
  required_batch_size_multiple: 8
  required_seq_len_multiple: 1
  dataset_impl: null
  data_buffer_size: 10
  train_subset: train
  valid_subset: valid
  combine_valid_subsets: null
  ignore_unused_valid_subsets: false
  validate_interval: 1
  validate_interval_updates: 0
  validate_after_updates: 0
  fixed_validation_seed: null
  disable_validation: false
  max_tokens_valid: ${dataset.max_tokens}
  batch_size_valid: ${dataset.batch_size}
  max_valid_steps: null
  curriculum: 0
  gen_subset: test
  num_shards: 1
  shard_id: 0
  grouped_shuffling: false
  update_epoch_batch_itr: ${dataset.grouped_shuffling}
  update_ordered_indices_seed: false
optimization:
  _name: null
  max_epoch: 0
  max_update: 0
  stop_time_hours: 0.0
  clip_norm: 0.0
  sentence_avg: false
  update_freq:
  - 1
  lr:
  - 0.25
  stop_min_lr: -1.0
  use_bmuf: false
  skip_remainder_batch: false
  debug_param_names: false
checkpoint:
  _name: null
  save_dir: checkpoints
  restore_file: checkpoint_last.pt
  continue_once: null
  finetune_from_model: null
  reset_dataloader: false
  reset_lr_scheduler: false
  reset_meters: false
  reset_optimizer: false
  optimizer_overrides: '{}'
  save_interval: 1
  save_interval_updates: 0
  keep_interval_updates: -1
  keep_interval_updates_pattern: -1
  keep_last_epochs: -1
  keep_best_checkpoints: -1
  no_save: false
  no_epoch_checkpoints: false
  no_last_checkpoints: false
  no_save_optimizer_state: false
  best_checkpoint_metric: loss
  maximize_best_checkpoint_metric: false
  patience: -1
  checkpoint_suffix: ''
  checkpoint_shard_count: 1
  load_checkpoint_on_all_dp_ranks: false
  write_checkpoints_asynchronously: false
  model_parallel_size: ${common.model_parallel_size}
bmuf:
  _name: null
  block_lr: 1.0
  block_momentum: 0.875
  global_sync_iter: 50
  warmup_iterations: 500
  use_nbm: false
  average_sync: false
  distributed_world_size: ${distributed_training.distributed_world_size}
generation:
  _name: null
  beam: 5
  beam_mt: 0
  nbest: 1
  max_len_a: 0.0
  max_len_b: 200
  max_len_a_mt: 0.0
  max_len_b_mt: 200
  min_len: 1
  match_source_len: false
  unnormalized: false
  no_early_stop: false
  no_beamable_mm: false
  lenpen: 1.0
  lenpen_mt: 1.0
  unkpen: 0.0
  replace_unk: null
  sacrebleu: false
  score_reference: false
  prefix_size: 0
  no_repeat_ngram_size: 0
  sampling: false
  sampling_topk: -1
  sampling_topp: -1.0
  constraints: null
  temperature: 1.0
  diverse_beam_groups: -1
  diverse_beam_strength: 0.5
  diversity_rate: -1.0
  print_alignment: null
  print_step: false
  lm_path: null
  lm_weight: 0.0
  iter_decode_eos_penalty: 0.0
  iter_decode_max_iter: 10
  iter_decode_force_max_iter: false
  iter_decode_with_beam: 1
  iter_decode_with_external_reranker: false
  retain_iter_history: false
  retain_dropout: false
  retain_dropout_modules: null
  decoding_format: null
  no_seed_provided: false
  eos_token: null
eval_lm:
  _name: null
  output_word_probs: false
  output_word_stats: false
  context_window: 0
  softmax_batch: 9223372036854775807
interactive:
  _name: null
  buffer_size: 0
  input: '-'
model: ???
task: null
criterion:
  _name: cross_entropy
  sentence_avg: ${optimization.sentence_avg}
optimizer: null
lr_scheduler:
  _name: fixed
  force_anneal: null
  lr_shrink: 0.1
  warmup_updates: 0
  lr: ${optimization.lr}
scoring: null
bpe: null
tokenizer: null
ema:
  _name: null
  store_ema: false
  ema_decay: 0.9999
  ema_start_update: 0
  ema_seed_model: null
  ema_update_freq: 1
  ema_fp32: false


Powered by Hydra (https://hydra.cc)
Use --hydra-help to view Hydra specific help


