NAME: 240828_Pretrain_Forward2_Dropcode015_Causal_drop03_1024x512x6_w512d3o128l1_down2x2_HumanML3D # Experiment name
ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
NUM_NODES: 1 # Number of GPU nodes for distributed training
DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]

TRAIN:
  #---------------------------------
  STAGE: vae # stage "vae" , "lm_pretrain", "lm_instruct"
  #---------------------------------
  NUM_WORKERS: 16 # Number of workers
  BATCH_SIZE: 512 # Size of batches
  END_EPOCH: 999999 # End epoch
  RESUME: '' # Resume training from this path
  # PRETRAINED_VAE: "/mnt/memData/experiments/tokenizer/240723_Pretrain_Forward2_Dropcode015_Causal_drop03_1024x8x6_w512d3o128l0_down3x1_HumanML3D_2024-07-22_20:46:34/checkpoints/latest-epoch=3799.ckpt"
  PRETRAINED: ''
  OPTIM:
    target: AdamW
    params:
      lr: 2e-4
      betas: [0.9, 0.95]
      weight_decay: 0.0

  LR_SCHEDULER:
    target: LambdaLR
    params:
      T_max: ${eval:${LOGGER.VAL_EVERY_STEPS} * 100}
      eta_min: 1e-6
      warmup_epochs: 200
      total_epochs: 10000

# Evaluating Configuration
EVAL:
  BATCH_SIZE: 32 # Evaluating Batch size
  SPLIT: test

TEST:
  CHECKPOINTS: ''
  SPLIT: test
  BATCH_SIZE: 32 # training Batch size

DATASET:
  target: mGPT.data.HumanML3D_old.HumanML3DDataModule

METRIC:
  # TYPE: ['TemosMetric', 'TM2TMetrics', 'MRMetrics']
  TYPE: ['TM2TMetrics']

LOSS:
  LAMBDA_FEATURE: 1.0
  LAMBDA_JERK: 0.0
  LAMBDA_VELOCITY: 0.5
  LAMBDA_COMMIT: 0.02
  LAMBDA_CLS: 1.0
  LAMBDA_ADV_G: 0.1
  LAMBDA_ADV_D: 0.1
  ABLATION:
    RECONS_LOSS: 'l1_smooth'

model:
  target: mStream.models.mstream_tokenizer.MotionVQ
  params:
    condition: 'text'
    task: 't2m'
    lm: ${lm.default}
    motion_vae:
      target: mStream.archs.mstream_tokenizer.vqvae.VQVAE
      # target: mStream.archs.mstream_tokenizer.vae2.KLVAE
      params:
        encoder_type: resnet1d_casual
        decoder_type: resnet1d_casual
        quantizer: multiscale11
        code_num: 1024
        code_dim: 512
        width: 512
        output_emb_width: 128
        down_t: 2
        stride_t: 2
        layers: 1
        depth: 3
        dilation_growth_rate: 3
        v_lengths: [1, 1, 1, 1, 1, 1]
        num_quantizers: 6
        quantize_dropout_prob: 0.3
        codebook_dropout: 0.15
        shared_codebook: false
        kmeans_init: true
        kmeans_iters: 10
        trans_layers: 0
        norm: None
        activation: ReLU
        nfeats: ${DATASET.NFEATS}
        flatten: false
        ablation: ${ABLATION}
    # discriminator:
    #   target: mStream.archs.mstream_tokenizer.discriminator.ACTORStyleEncoder
    #   params:
    #     latent_dim: 256
    #     ff_size: 1024
    #     num_layers: 6
    #     num_heads: 4
    #     dropout: 0.1
    #     activation: gelu

LOGGER:
  TYPE: ['tensorboard', 'wandb']
  VAL_EVERY_STEPS: 200
  SVAE_EVERY_STEPS: 1000
  WANDB:
    params:
      project: MotionVQ
