# @package _global_
name: ttnet_multitask
notes: >
  Baseline model of "Visual Transformation Telling", using a combination of
  three loss functions during training:
    - standard cross-entropy loss for text generation
    - cross-entropy loss for category and topic classification
    - mse loss for transformation construction

defaults:
  - override /model: ttnet_multitask
  - override /model/generate_cfg: top_k_top_p_no_repeat
  - override /criterion: multitask

criterion:
  loss:
    logit_shift: 0
    label_shift: -1
    text_model: "ViT-B/32"

pl_trainer:
  precision: 32
  strategy: null
  sync_batchnorm: False
