NAME: 240710_T2M_MoMask_Dropcode_Causal_drop03_1024x8x6_HumanML3D # Experiment names
ACCELERATOR: 'gpu' # Devices optioncal: “cpu”, “gpu”, “tpu”, “ipu”, “hpu”, “mps, “auto”
NUM_NODES: 1 # Number of GPU nodes for distributed training
DEVICE: [0] # Index of gpus eg. [0] or [0,1,2,3]

TRAIN:
  #---------------------------------
  STAGE: lm_pretrain # stage "vae" , "lm_pretrain", "lm_instruct"
  #---------------------------------
  NUM_WORKERS: 16 # Number of workers
  BATCH_SIZE: 512 # Size of batches
  END_EPOCH: 100000 # End epoch
  PRETRAINED_VAE: '/mnt/memData/experiments/tokenizer/240713_Forward2_Dropcode015_Causal_drop03_1024x8x6_w512d3o128l0_down3x1_HumanML3D/checkpoints/min-FID-62999.ckpt'
  PRETRAIN_MOTIONSR: '/mnt/memData/experiments/momask_res/240715_Res_MoMask_Dropcode_Causal_drop03_1024x8x6_HumanML3D/checkpoints/min-FID-799.ckpt'
  PRETRAIN_MOMASK: '/mnt/memData/experiments/momask_t2m/240801_AblDiTMaskInOut_T2M_MoMask_Dropcode_Causal_drop03_1024x8x6_HumanML3D/checkpoints/min-FID-419.ckpt'
  PRETRAINED_MTR: ${TRAIN.PRETRAIN_MOMASK}
  PRETRAIN_MOTIONSR2: ${TRAIN.PRETRAIN_MOTIONSR}
  PRETRAINED_MTR2: ${TRAIN.PRETRAIN_MOMASK}
  PRETRAINED: ''
  RESUME: '' # Resume training from this path
  OPTIM:
    target: AdamW
    params:
      lr: 2e-4
      betas: [0.9, 0.99]
      weight_decay: 0.0

# Evaluating Configuration
EVAL:
  BATCH_SIZE: 32 # Evaluating Batch size
  SPLIT: test

TEST:
  CHECKPOINTS: ''
  SPLIT: test
  BATCH_SIZE: 32 # training Batch size

DATASET:
  target: mGPT.data.HumanML3D_old_momask.HumanML3DDataModule
  CODE_PATH: /mnt/datasets/humanml3d/causal_4096x8x6_dropcode
  HUMANML3D:
    UNIT_LEN: 1

METRIC:
  TYPE: ['TemosMetric', 'TM2TMetrics', 'MRMetrics']
  # TYPE: ['TMRMetrics']

LOSS:
  LAMBDA_FEATURE: 1.0
  LAMBDA_VELOCITY: 0.5
  LAMBDA_COMMIT: 0.02
  LAMBDA_CLS: 1.0
  ABLATION:
    RECONS_LOSS: 'l1_smooth'

model:
  target: mGPT.models.mstream_flowmdm_ori.MotionStream
  params:
    use_momask_vq: False
    use_momask_res: False
    condition: 'text'
    task: 't2m'
    mtr:
      target: mStream.archs.mstream_trans.mstream_dit.MaskTransformer
      params:
        code_dim: 512
        num_tokens: ${model.params.motion_vae.params.code_num}
        num_quantizers: ${model.params.motion_vae.params.num_quantizers}
        cond_mode: text
        latent_dim: 384
        num_layers: 10
        num_heads: 6
        dropout: 0.3
        clip_dim: 512
        cond_drop_prob: 0.1
        clip_version: ViT-B/32
        poe_type: absolute
    motion_vae:
      target: mStream.archs.mstream_tokenizer.vqvae.VQVAE
      params:
        encoder_type: resnet1d_casual
        decoder_type: resnet1d_casual
        quantizer: multiscale11
        code_num: 1024
        code_dim: 8
        width: 512
        output_emb_width: 128
        down_t: 3
        stride_t: 1
        layers: 0
        depth: 3
        dilation_growth_rate: 3
        v_lengths: [1, 1, 1, 1, 1, 1]
        num_quantizers: 6
        quantize_dropout_prob: 0.3
        codebook_dropout: 0.15
        shared_codebook: false
        kmeans_init: true
        kmeans_iters: 10
        trans_layers: 0
        norm: None
        activation: ReLU
        nfeats: ${DATASET.NFEATS}
        flatten: false
        ablation: ${ABLATION}
    motion_sr:
      target: mGPT.archs.momask_trans.ResidualTransformer
      params:
        code_dim: 512
        num_tokens: ${model.params.motion_vae.params.code_num}
        # num_quantizers: ${model.params.motion_vae.params.num_quantizers}
        num_quantizers: 6
        cond_mode: text
        latent_dim: 384
        ff_size: 1024
        num_layers: 8
        num_heads: 6
        dropout: 0.1
        clip_dim: 512
        cond_drop_prob: 0.1
        clip_version: ViT-B/32
        opt: null
        shared_codebook: False
        share_weight: True
    codebook_size: ${model.params.motion_vae.params.code_num}
    num_quantizers: ${model.params.motion_vae.params.num_quantizers}
LOGGER:
  TYPE: ['tensorboard', 'wandb']
  VAL_EVERY_STEPS: 20
  WANDB:
    params:
      project: MotionStream
