torchrun --standalone --nproc_per_node=8 train.py \
    train.klreg="1.0e-2" \
    train.reward_exp="1.0e+2" \
    prompt_fn=simple_animals \
    reward_fn=aesthetic_score 