# @package _group_
common:
  fp16: true
  log_format: json
  log_interval: 200
  use_plasma_view: false
  tensorboard_logdir: tb

checkpoint:
  no_epoch_checkpoints: true
  save_interval: 9999999
  restore_file: ???
  reset_optimizer: true
  reset_dataloader: true
  reset_meters: true

task:
  _name: t5_seq2seq_mixture
  data: ???
  truncate_source: true
  truncate_target: true
  tokens_per_sample: 1024

criterion:
  _name: label_smoothed_cross_entropy
  report_accuracy: true

dataset:
  max_tokens: 4096
  validate_interval: 9999999
  valid_subset: valid_code_x_glue_cc_code_refinement_small,valid_code_x_glue_cc_code_refinement_medium

optimizer:
  _name: adam
  weight_decay: 0.01
  adam_betas: (0.9,0.98)
  adam_eps: 1e-06

lr_scheduler:
  _name: polynomial_decay
  warmup_updates: 100

optimization:
  clip_norm: 0
  lr: [0.00002]
  max_update: 1000
  update_freq: [4]  # Assuming 16 GPUs

model:
  _name: transformer_t5_base_rpe
  max_positions: 1280