NAME: 240715_Res_MoMask_Dropcode_Causal_drop03_1024x8x6_HumanML3D # Experiment names
ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
NUM_NODES: 1 # Number of GPU nodes for distributed training
DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]

TRAIN:
  #---------------------------------
  STAGE: lm_pretrain # stage "vae" , "lm_pretrain", "lm_instruct"
  #---------------------------------
  NUM_WORKERS: 16 # Number of workers
  BATCH_SIZE: 256 # Size of batches
  END_EPOCH: 999999 # End epoch
  PRETRAINED_VAE: '/mnt/memData/experiments/tokenizer/240713_Forward2_Dropcode015_Causal_drop03_1024x8x6_w512d3o128l0_down3x1_HumanML3D/checkpoints/min-FID-62999.ckpt'
  # PRETRAINED: /mnt/memData/experiments/msr3/240618_Res_MoMask_Causal_1024x6x6_HumanML3D/checkpoints/latest-epoch=159.ckpt
  # RESUME: '/mnt/memData/experiments/msr3/240603_Res_MoMask_Causal_2048x6x6_HumanML3D/checkpoints/min-FID-89.ckpt' # Resume training from this path
  OPTIM:
    target: AdamW
    params:
      lr: 1e-4
      betas: [0.9, 0.99]
      weight_decay: 0.0

# Evaluating Configuration
EVAL:
  BATCH_SIZE: 32 # Evaluating Batch size
  SPLIT: test

TEST:
  CHECKPOINTS: '/mnt/memData/experiments/msr3/240530_Res_MoMask_Causal_2048x6x6_HumanML3D/checkpoints/epoch=8999.ckpt'
  SPLIT: test
  BATCH_SIZE: 32 # training Batch size

DATASET:
  target: mGPT.data.HumanML3D_old_momask.HumanML3DDataModule
  CODE_PATH: /mnt/datasets/humanml3d/causal_1024x8x6_dropcode

METRIC:
  TYPE: ['TemosMetric', 'TM2TMetrics', 'MRMetrics']
  # TYPE: ['TMRMetrics']

LOSS:
  LAMBDA_FEATURE: 1.0
  LAMBDA_VELOCITY: 0.5
  LAMBDA_COMMIT: 0.02
  LAMBDA_CLS: 1.0
  ABLATION:
    RECONS_LOSS: 'l1_smooth'

model:
  target: mStream.models.momask.momask_res.MotionSR
  params:
    use_momask_vq: False
    use_momask_res: False
    condition: 'text'
    task: 't2m'
    motion_vae:
      target: mStream.archs.mstream_tokenizer.vqvae.VQVAE
      params:
        encoder_type: resnet1d_casual
        decoder_type: resnet1d_casual
        quantizer: multiscale11
        code_num: 1024
        code_dim: 8
        width: 512
        output_emb_width: 128
        down_t: 3
        stride_t: 1
        layers: 0
        depth: 3
        dilation_growth_rate: 3
        v_lengths: [1, 1, 1, 1, 1, 1]
        num_quantizers: 6
        quantize_dropout_prob: 0.3
        codebook_dropout: 0.15
        shared_codebook: false
        kmeans_init: true
        kmeans_iters: 10
        trans_layers: 0
        norm: None
        activation: ReLU
        nfeats: ${DATASET.NFEATS}
        flatten: false
        ablation: ${ABLATION}
    motion_sr:
      target: mGPT.archs.momask_trans.ResidualTransformer
      params:
        code_dim: 512
        num_tokens: ${model.params.motion_vae.params.code_num}
        num_quantizers: ${model.params.motion_vae.params.num_quantizers}
        cond_mode: text
        latent_dim: 384
        ff_size: 1024
        num_layers: 8
        num_heads: 6
        dropout: 0.1
        clip_dim: 512
        cond_drop_prob: 0.1
        clip_version: ViT-B/32
        opt: null
        shared_codebook: False
        share_weight: True
    codebook_size: ${model.params.motion_vae.params.code_num}
    num_quantizers: ${model.params.motion_vae.params.num_quantizers}
LOGGER:
  TYPE: ['tensorboard', 'wandb']
  VAL_EVERY_STEPS: 10
  WANDB:
    params:
      project: MotionStream
