NAME: stage1_t2m
ACCELERATOR: gpu
NUM_NODES: 1
DEVICE: [0,1]

TRAIN:
  SPLIT: train
  DATASETS: humanml3d
  instruction_type: t2m
  STAGE: lm_pretrain
  NUM_WORKERS: 8
  BATCH_SIZE: 32
  accumulate_grad_batches: 2
  END_EPOCH: 50

  RESUME: ''
  PRETRAINED: ''
  PRETRAINED_VAE: ''
  PRETRAINED_VAE: checkpoints/mld_humanml3d_checkpoint/1222_mld_humanml3d_FID041.ckpt # Vae model path

  LR_SCHEDULER:
    target: CosineAnnealingLR
    params:
      T_max: ${eval:${LOGGER.VAL_EVERY_STEPS} * 100}
      eta_min: 1e-6

  OPTIM:
    target: AdamW
    params:
      lr: 2e-4
      betas: [0.9, 0.99]
      weight_decay: 0.0
    params_diff:
      lr: 1e-4
      betas: [0.9, 0.99]
      weight_decay: 0.0

  ABLATION:
    VAE_TYPE: 'actor'
    VAE_ARCH: 'encoder_decoder'
    MLP_DIST: False 
    IS_DIST: False 
    PREDICT_EPSILON: True 
    SKIP_CONNECT: True
    PE_TYPE: mld
    DIFF_PE_TYPE: mld

# Evaluating Configuration
EVAL:
  BATCH_SIZE: 32 
  NUM_WORKERS: 12
  SPLIT: val
  DATASETS: humanml3d

TEST:
  TEST_DIR: ''
  CHECKPOINTS:  experiments/motiongpt3/stage1_t2m/checkpoints/last.ckpt
  SPLIT: test
  BATCH_SIZE: 32
  NUM_WORKERS: 12
  SAVE_PREDICTIONS: false
  COUNT_TIME: false
  REPLICATION_TIMES: 20
  MM_NUM_SAMPLES: 100
  MM_NUM_REPEATS: 30
  MM_NUM_TIMES: 10
  DIVERSITY_TIMES: 300
  REP_I: 0
  DATASETS: humanml3d
  MEAN: false
  NUM_SAMPLES: 1
  FACT: 1
  FOLDER: results

ABLATION:
  use_length: False
  predict_ratio: 0.2
  inbetween_ratio: 0.25
  image_size: 256

DATASET:
  target: motGPT.data.HumanML3D.HumanML3DDataModule
  CODE_PATH: TOKENS
  TASK_ROOT: ''
  TASK_PATH: ''
  NFEATS: 263
  HUMANML3D:
    MAX_MOTION_LEN: 196
    MIN_MOTION_LEN: 40
    MAX_TEXT_LEN: 20
    PICK_ONE_TEXT: true
    FRAME_RATE: 20.0
    UNIT_LEN: 4
    STD_TEXT: False
    FPS: 20
    ROOT: /datasets/humanml3d  # datasets/humanml3d # HumanML3D directory
    SPLIT_ROOT: /datasets/humanml3d  # datasets/humanml3d # HumanML3D splits directory
    MEAN_STD_PATH: deps/t2m/

METRIC:
METRIC:
  TASK: 't2m'
  TYPE: ['TM2TMetrics', 'PredMetrics']
  FORCE_IN_METER: True
  DIST_SYNC_ON_STEP: True

LOSS:
  LAMBDA_REC: 1.0 # Lambda for reconstruction losses
  LAMBDA_JOINT: 1.0 # Lambda for joint losses
  
  LAMBDA_LATENT: 1e-5 # Lambda for latent losses
  LAMBDA_KL: 1e-5 # Lambda for kl losses
  LAMBDA_GEN: 1.0 # Lambda for text-motion generation losses
  LAMBDA_CROSS: 1.0 # Lambda for cross-reconstruction losses
  LAMBDA_CYCLE: 1.0 # Lambda for cycle losses
  LAMBDA_PRIOR: 0.0 # Lambda for diffusion prior losses
  
  LAMBDA_VELOCITY: 0.5 # Lambda for velocity losses
  LAMBDA_COMMIT: 0.02 # Lambda for commitment losses
  
  LAMBDA_CLS: 0.1
  LAMBDA_DIFF: 1.0
  ABLATION:
    RECONS_LOSS: 'l1_smooth'

model:
  target: motGPT.models.motiongpt3.MotionGPT3
  condition: 'text'
  task: 't2m'
  
  lm:
    target: motGPT.models.architectures.motiongpt3.MLM
    params:
      model_type: conditional_multitask
      model_path: deps/mot-gpt2
  motion_vae:
    target: motGPT.models.architectures.mld_vae.MldVae
    arch: 'encoder_decoder'
    ff_size: 1024
    num_layers: 9
    num_heads: 4
    dropout: 0.1
    normalize_before: false
    activation: 'gelu'
    position_embedding: 'learned'
    latent_dim: [1, 256] # latent dimension
    code_num: 512
    guidance_scale: 3.0 #
    guidance_uncondp: 0.1 # 0.1 0.25
    datatype: ${DATASET.target}  # 'humanml3d', 'motionx'
    nfeats: ${DATASET.NFEATS}
    ablation: ${TRAIN.ABLATION}

  t2m_textencoder:
    dim_word: 300
    dim_pos_ohot: 15
    dim_text_hidden: 512
    dim_coemb_hidden: 512
    target: mld.models.architectures.t2m_textenc.TextEncoderBiGRUCo
    params:
      word_size: 300
      pos_size: 15
      hidden_size: 512
      output_size: 512
  t2m_motionencoder:
    dim_move_hidden: 512
    dim_move_latent: 512
    dim_motion_hidden: 1024
    dim_motion_latent: 512
    target: mld.models.architectures.t2m_motionenc.MotionEncoder
    params:
      input_size: ${model.t2m_moveencoder.output_size}
      hidden_size: 1024
      output_size: 512
      
  bert_path: deps/distilbert-base-uncased
  clip_path: deps/clip-vit-large-patch14
  t2m_path: deps/t2m/
  m2t_bert_path: deps/bert_score/roberta-large
  m2t_baseline_path: deps/bert_score/roberta-large.tsv
  humanact12_rec_path: deps/actionrecognition
  uestc_rec_path: deps/actionrecognition
      
LOGGER:
  TYPE: ['tensorboard', 'wandb']
  VAL_EVERY_STEPS: 25
  WANDB:
    params:
      project: motiongpt3

  tensorboard:
    target: pytorch_lightning.loggers.TensorBoardLogger
    params:
      save_dir: ${FOLDER_EXP}
      name: tensorboard
      version: ''
  wandb:
    target: pytorch_lightning.loggers.WandbLogger
    params:
      project: mem
      offline: false
      id: null
      version: ''
      name: ${NAME}
      save_dir: ${FOLDER_EXP}

