data_path: /mnt/data/structured_data/ODIR_ImageFolder/train.X
val_data_path: /mnt/data/structured_data/ODIR_ImageFolder/val.X
save_best: true

image_size: 128 # Remember to remove one more downsample layer in the encoder


vq_model: VQ-16
enc_type: cnn
dec_type: cnn
dropout_p: 0.1

semantic_guide: none
global_batch_size: 128
epochs: 200
lr_scheduler: cosine
lr: 1e-4
min_cosine_ratio: 0.01

codebook_embed_dim: 256
codebook_size: 16384
codebook_l2_norm: false


# abs_pos_embed: true # useless for cnn backbone 
 
product_quant: 1 # quantize once (necessary for original VAR)

ema: false  ## ?? ImageFolder does use EMA to upate the model parameters
 
codebook_drop: 0.0 # no dropout for original VAR
# encoder_model: vit_base_patch14_dinov2.lvd142m # useless for cnn backbone
# decoder_model: vit_base_patch14_dinov2.lvd142m # useless for cnn backbone

num_latent_tokens: 256 # set as 256 for our VAR setting 16 * 16 patches

v_patch_nums: [16] # Original VAR setting 16 * 16 patches



# half_sem: true # useless for cnn backbone
# start_drop: 3 No codebook drop for Original VAR
ckpt_per_epoch: 5

# sem_loss_weight: 0.1 # useless for cnn backbone
# enc_tuning_method: full # 


lecam_loss_weight: 0.001 # confirm lecam loss weight
weight_decay: 0.0 

kmeans_init: false

ae_training: true

disc_weight_decay: 0.0005
disc_epoch_start: 0 
disc_type: none # Original VAR seems to use dinodisc as discriminator which is different from the previous work

aug_fade_steps: 0 # confirm aug fade steps
disc_adaptive_weight: true
cloud_save_path: /mnt/data/ImageFolder/output/ODIR-maskgit/exp-My_MaskGIT_AE_e200_lr1e-4_bs128_cosine
