CUDA_VISIBLE_DEVICES=2,3 \
python main.py \
  model=small \
  data=openwebtext-split \
  wandb.name=ft-small-uncond-no-wrap-seqlen512-$(date +%Y%m%d_%H%M%S) \
  parameterization=subs \
  model.length=512 \
  model.cond_dim_embedding=384 \
  eval.compute_generative_perplexity=True \
  sampling.num_sample_batches=4 \
  sampling.steps=128 \
  checkpointing.resume_from_ckpt=True \
  checkpointing.resume_ckpt_path="" \
  checkpointing.save_dir="" \
  loader.global_batch_size=512 \
  trainer.accumulate_grad_batches=2 \
  loader.batch_size=128 \
  loader.eval_batch_size=128 \
  trainer.val_check_interval=5000 \
  text_embedder.use_text_embedder=false \
  text_embedder.model_name=sentence-transformers/all-MiniLM-L6-v2 \
  text_embedder.cond_dropout=0.5 \
  text_embedder.cond_dropout_std=0.11 \
  text_embedder.random_projection_dim=null \
  text_embedder.noise=0.0 \
  eval.generate_samples=True \
  +embedding_cache_dir="" \
  data.wrap=False
