trainer: t2v_dmd_grpo
dfake_gen_update_ratio: 5
GAS: 5
generator_ckpt: 

log_iters: 100
seed: 0

real_name: Wan2.1-T2V-1.3B
fake_name: Wan2.1-T2V-1.3B
generator_name: Wan2.1-T2V-1.3B

real_path: 
fake_path: 
generator_path: 

model_kwargs:
  timestep_shift: 5.0

sharding_strategy: full
generator_fsdp_wrap_strategy: size
real_score_fsdp_wrap_strategy: size
fake_score_fsdp_wrap_strategy: size

mixed_precision: true

num_train_timestep: 1000
denoising_step_list:
- 1000
- 750
- 500
- 250
timestep_shift: 5.0
guidance_scale: 6.0

batch_size: 1

ema_weight: 0.99
ema_start_step: 300

lr: 2.0e-06
lr_critic: 4.0e-07
beta1: 0.0
beta2: 0.999
beta1_critic: 0.0
beta2_critic: 0.999
weight_decay: 0.01

train_data_path: 
train_eval_data_path:

gc_interval: 20

wandb_host: WANDB_HOST
wandb_key: 
wandb_entity: 
wandb_project: 