# @package _global_

policy:
  name: vita_token
  trainer: policy

  action_horizon: 8
  pred_horizon: 16
  obs_horizon: 1

  observer:
    name: resnet18
    tokenize: true

  vita:
    decode_flow_latents: true
    consistency_weight: 1.0
    enc_contrastive_weight: 1e-4
    flow_contrastive_weight: 0.0
    token_dim: 64

  flow_matcher:
    name: exact
    sigma: 0.0
    num_sampling_steps: 6

  flow_net:
    name: token_transformer
    hidden_dim: 256
    num_layers: 4
    num_heads: 8
    mlp_ratio: 4
    dropout: 0.0

  use_cvae: false

  action_ae:
    kl_weight: 1e-6
    enc_recon_weight: 0.5
    flow_recon_weight: 0.5
    recon_loss_type: l1 # or l2
    use_variational: false
    freeze_encoder: false
    freeze_decoder: false
    pretrained_path: null

    net:
      encoder_type: token_cnn  # Options: token_cnn, token_cnn_v2
      decoder_type: token_cnn # Options: token_cnn, token_cnn_v2
      enc_hidden_dim: 256
      dec_hidden_dim: 256
      latent_dim: ${policy.vita.token_dim}
      num_tokens: 10
      token_dim: ${policy.vita.token_dim}
      num_heads: 8
      mlp_ratio: 4
      dropout: 0.0
      num_layers: 4  # Increased for UNet
