torchrun --standalone --nproc_per_node=8 train.py \
    train.klreg="0.0e+0" \
    train.reward_exp="1.0e+4" \
    prompt_fn=hpd_photo_painting \
    reward_fn=hpscore