defaults: # loads default configs
  - task: nlp/language_modeling
  - optimizer: adam
  - scheduler: constant_schedule_with_warmup
  - training: default
  - trainer: default
  - trainer/logger: wandb
  - hydra: output/custom
  - override dataset: nlp/language_modeling/wikitext

backbone:
  pretrained_model_name_or_path: gpt2

dataset:
  cfg:
    eval_batch_size: 10
    block_size: 512
    stride: 256
    dataset_config_name: wikitext-2-raw-v1

task:
  cfg:
    compute_generate_metrics: True
    task_name: lm_ct
    scheduler_monitor: val_loss
    scheduler_mode: min
    no_repeat_ngram_size: 0 # 3 by default
    encoder_no_repeat_ngram_size: 0 # 3 by default
    val_target_max_length: 150
    prefix_length: 50
    num_beams: 1 # 10 by default
    lm_stride: ${dataset.cfg.stride}
    simctg: False
    padding_mask: True # mask for padding tokens
    identical_mask: False # mask for identical tokens
    generate_after_progress: 0 # don't generate before this threshold, to save time
    ul_seq: False
    ct_seq: False
    topk_negatives: 0 # topk prediction as negative examples; 0: skipping
    preced_m_negatives: 60 # -1: use all; 0: use none; k: use preceding k tokens as negatives
    negative_method: ct # ul, ul2, nce
    scratch: False
    save_generation_path: null # ${trainer.default_root_dir}/generated.txt
    ct_seq_len: 150

trainer:
  gpus: 1
  val_check_interval: 1000
  max_steps: 50000
  max_epochs: 100
  gradient_clip_val: 0.1
  resume_from_checkpoint: ${trainer.default_root_dir}/last.ckpt
  default_root_dir: ${oc.env:HOME}/storage/trained/lit/${task.cfg.task_name}/${backbone.pretrained_model_name_or_path}_${dataset.cfg.pretrained_dataset_name}/${experiment_name}
  logger:
    project: ct
  callbacks:
    checkpoint_callback:
      save_last: True
      save_top_k: 1

training:
  lr: 1e-5
  batch_size: 4

optimizer:
  weight_decay: 0

experiment_name: ${now:%Y-%m-%d}_${now:%H-%M-%S-%f}
log: True
ignore_warnings: True # todo: check warnings before release
seed: 0
stage: train # choose from train, test, (predict, interact, to be added)
finetune_ckpt: null
