data_path: /imagenet100_var/train.X
val_data_path: /imagenet100_var/val.X
save_best: true

image_size: 128 # Remember to remove one more downsample layer in the encoder


vq_model: VQ-16
enc_type: cnn
dec_type: cnn
dropout_p: 0.1

semantic_guide: none
global_batch_size: 128
epochs: 200
lr_scheduler: cosine
lr: 3e-5


codebook_embed_dim: 32
codebook_size: 16384
codebook_l2_norm: false

 
product_quant: 1 # quantize once (necessary for original VAR)

ema: false  
 
codebook_drop: 0.0 # no dropout for original VAR
# encoder_model: vit_base_patch14_dinov2.lvd142m # useless for cnn backbone
# decoder_model: vit_base_patch14_dinov2.lvd142m # useless for cnn backbone

num_latent_tokens: 256 # set as 256 for our VAR setting 16 * 16 patches

v_patch_nums: [1, 2, 3, 4, 5, 6, 8, 10, 13, 16] # Original VAR setting 16 * 16 patches



# half_sem: true # useless for cnn backbone
# start_drop: 3 No codebook drop for Original VAR
ckpt_per_epoch: 5

# sem_loss_weight: 0.1 # useless for cnn backbone
# enc_tuning_method: full # 


lecam_loss_weight: 0.001 # confirm lecam loss weight
weight_decay: 0.0 

kmeans_init: false

ae_training: true

disc_weight_decay: 0.0005
disc_epoch_start: 0 
disc_type: none # Original VAR seems to use dinodisc as discriminator which is different from the previous work

aug_fade_steps: 0 # confirm aug fade steps
disc_adaptive_weight: true
cloud_save_path: /ImageFolder/output/exp-My_VAR_AE
