data:
  batch_size: 3
  dataset:
    ablate: true
    data_dir: /path/to/data
    max_len: 2060
    padding: true
    time_horizon: 25
    use_embed: true
  mode: cylinder
  normalizer:
    conditional: false
    scaler: normal
    stat_path: /path/to/normalizer/stats
    use_norm: true
  num_workers: 20
load_dir: /path/to/eval/dir
model:
  base_learning_rate: 5.0e-05
  beta_schedule: linear
  channels: 16
  clip_denoised: false
  cond_stage_config:
    conditional: true
  cond_stage_trainable: true
  cosine_s: 0.008
  dist: true
  first_stage_config:
    aeconfig:
      decoder:
        attn_resolutions:
        - 16
        ch_mult:
        - 1
        - 2
        - 4
        double_z: true
        dropout: 0.1
        gno_coord_dim: 3
        gno_coord_embed_dim: 16
        gno_mlp_hidden_layers:
        - 64
        - 64
        - 64
        - 64
        gno_radius: 0.0425
        gno_transform_type: linear
        gno_use_torch_scatter: true
        hidden_channels: 64
        in_channels: 16
        num_res_blocks: 2
        out_channels: 3
        resolution: 64
        tanh_out: false
        use_open3d: true
        z_channels: 16
      double_z: true
      encoder:
        attn_resolutions:
        - 16
        ch_mult:
        - 1
        - 2
        - 4
        double_z: true
        dropout: 0.1
        gno_coord_dim: 3
        gno_coord_embed_dim: 16
        gno_mlp_hidden_layers:
        - 64
        - 64
        - 64
        - 64
        gno_radius: 0.0425
        gno_transform_type: linear
        gno_use_torch_scatter: true
        hidden_channels: 64
        in_channels: 3
        num_res_blocks: 2
        out_channels: 16
        resolution: 64
        tanh_out: false
        use_open3d: true
        z_channels: 16
      latent_grid_size: 64
    lossconfig:
      kl_weight: 2.0e-07
    pretrained_path: /path/to/pretrained/autoencoder
    training:
      dist: true
  image_size: 16
  linear_end: 0.012
  linear_start: 0.00085
  log_every_t: 200
  model_config:
    context_dim: 768
    depth: 28
    dim: 3
    hidden_size: 1024
    in_channels: 16
    input_size:
    - 16
    - 16
    - 16
    learn_sigma: false
    mlp_ratio: 4.0
    num_heads: 16
    patch_size:
    - 2
    - 2
    - 2
    use_cross_attn: true
  monitor: val/loss
  num_timesteps_cond: 1
  parameterization: eps
  scale_factor: 0.2
  scheduler_config:
    scheduler: cosine
  timesteps: 1000
model_path: /path/to/model/to/eval
training:
  accelerator: gpu
  accumulate_grad_batches: 1
  check_val_every_n_epoch: 10
  checkpoint: null
  dataset_size: 1000
  default_root_dir: logs/
  devices: 1
  ema_decay: null
  ema_every_n_steps: 1
  log_every_n_steps: 32
  max_epochs: 1000
  seed: 42
  strategy: auto
wandb:
  name: LDM_cylinder_DiT_text_ablate
  project: ldm_diffusion
