model:
  target: image_synthesis.modeling.codecs.image_codec.resnet_vqvae.ResNetVQVAE
  params:
    embed_dim: 512
    n_embed: 4096
    body_type: resnet18
    sample_type: conv
    hidden_planes: 256
    token_shape: [16, 16]
    trainable: True

solver:
  base_lr: 0.0
  adjust_lr: none
  max_epochs: 30 
  save_epochs: 1
  validation_epochs: 1
  sample_iterations: epoch     # how many iterations to perform sampling once ?
  optimizers_and_schedulers: # a list of configures, so we can config several optimizers and schedulers
  - name: none # default is none
    optimizer:
      target: torch.optim.Adam
      # params: 
      #   betas: !!python/tuple [0.9, 0.96]
      #   weight_decay: 4.5e-2
    scheduler:
      step_iteration: 1
      target: image_synthesis.engine.lr_scheduler.CosineAnnealingLRWithWarmup
      params:
        min_lr: 1.0e-6
        warmup_lr: 3.0e-4 # the lr to be touched after warmup
        warmup: 10000 

dataloader:
  data_root: data
  batch_size: 32
  num_workers: 2
  train_datasets: 
    - target: image_synthesis.data.tsv_dataset.TSVImageTextDataset
      params:
        name: conceptualcaption/train
        image_tsv_file: ['gcc-train-image-00.tsv','gcc-train-image-01.tsv']
        text_tsv_file: ['gcc-train-text-00.tsv', 'gcc-train-text-01.tsv']
        text_format: json
        im_preprocessor_config:
          target: image_synthesis.data.utils.image_preprocessor.DalleTransformerPreprocessor
          params:
            size: 128
            phase: train
    # - target: image_synthesis.data.tsv_dataset.TSVImageTextDataset
    #   params:
    #     name: conceptualcaption/val
    #     image_tsv_file: ['gcc-val-image.tsv', 'gcc-val-image.tsv']
    #     text_tsv_file: ['gcc-val-text.tsv', 'gcc-val-text.tsv']
    #     text_format: json
    #     im_preprocessor_config:
    #       target: image_synthesis.data.utils.image_preprocessor.DalleTransformerPreprocessor
    #       params:
    #         size: 128
    #         phase: train
  validation_datasets:
    - target: image_synthesis.data.tsv_dataset.TSVImageTextDataset
      params:
        name: conceptualcaption/val
        image_tsv_file: ['gcc-val-image.tsv']
        text_tsv_file: ['gcc-val-text.tsv']
        text_format: json
        im_preprocessor_config:
          target: image_synthesis.data.utils.image_preprocessor.DalleTransformerPreprocessor
          params:
            size: 128
            phase: val