pipeline:
  name: optimus_prime

  task: 3dsav
  eval_task: false
  restore_model: true

  lang_encoder:
    name: bert_lang_encoder
    args:
      num_hidden_layer: 4

  point_encoder:
    name: point_tokenize_encoder
    args:
      backbone: pointnet++
      hidden_size: 768
      path: null
      freeze_feature: true
      num_attention_heads: 12
      spatial_dim: 5
      num_layers: 4
      dim_loc: 6
      pairwise_rel_type: center

  unified_encoder:
    name: unified_encoder_v2
    args:
      hidden_size: 768
      num_attention_heads: 12
      num_layers: 4
      dim_loc: 6
      use_self_diff_lpss: true
      self_diff_lpss_num_subspaces: 6
      self_diff_lpss_txt_len: 50
      self_diff_lpss_obj_len: 120
      self_diff_lpss_temperature: 0.5
      self_diff_lpss_bias_scale_init: 12.0
      self_diff_lpss_dropout: 0.1
      self_diff_lpss_use_gumbel: true
      self_diff_lpss_top_k: 2
      self_diff_lpss_min_weight: 0.02
      self_diff_lpss_grad_scale: 10.0
      self_diff_lpss_orthogonal_weight: 0.05
      self_diff_lpss_entropy_weight: 0.1
      self_diff_lpss_diversity_weight: 0.02
      self_diff_lpss_use_enhanced_routing: true
      self_diff_lpss_use_cross_structure: true
      self_diff_lpss_use_txt_structure: true
      self_diff_lpss_use_obj_structure: true

  ground_head:
    name: ground_head_v1
    args:
      input_size: 768
      hidden_size: 768
      sem_qr_size: 607
      dropout: 0.3

  qa_head:
    name: qa_head_lpss
    args:
      hidden_size: 768
      mlp_size: 256
      glimpse: 1
      flat_out_size: 512
      num_answers: 2
      dropout: 0.3
      use_lpss: true
      num_subspaces: 6
      seq_len: 50
      lpss_dropout: 0.1
      lpss_temperature: 1.5
      lpss_bias_scale: 5.0
      lpss_router_gain: 1.0
      lpss_logits_scale: 5.0
      lpss_use_gumbel: true
      lpss_gumbel_tau: 1.0
      lpss_gumbel_hard: false
      lpss_top_k: 2
      lpss_grad_scale: 10.0

  qa_dataset:
    name: 3dsav_task
    args:
      tokenizer: bert_tokenizer
      txt_seq_length: 50
      pc_seq_length: 120
      pc_type: 'gt'

  qa_loss:
    name: qa_loss_focal_smooth
    args:
      gamma: 2.0
      alpha: 1.0
      smoothing: 0.1

  logger:
    name: tensorboard_logger
    args:
      log_dir: ../runs_ablation_full/

  saver:
    name: model_saver
    args:
      save_dir: outputs/ablation_full/
      save_name: model.pth
      load_dir: project/pretrain_weights/
      load_name: 3dsav.pth

  experiment:
    name: ablation_full
    base_dir: experiments

  batch_size: 128
  learning_rate: 1.0e-4
  grad_norm: 5.0
  epochs: 40
  warmup_steps: 500
  lang_lr_mul: 0.1
  point_lr_mul: 1.0
  unified_lr_mul: 1.0
  beta1: 0.9
  beta2: 0.98

  seed: 42
  seed_warn_only: true

  staged_training:
    enabled: true
    stage1_epochs: 25
    stage1_answer_weight: 1.0
    stage1_aux_weight: 1.0
    stage2_answer_weight: 10.0
    stage2_aux_weight: 0.8

  self_diff_lpss:
    enabled: true
    lr_multiplier: 5.0
    warmup_epochs: 1

  lpss:
    lr_multiplier: 10.0
    entropy_weight: 0.1
    warmup_epochs: 1
    min_entropy: 0.5
    min_entropy_multiplier: 5.0
    load_balance_weight: 0.3
    orthogonal_weight: 0.5
