T_ON=0.55
T_OFF=0.05
ALPHA_ON=0.9

CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py \
  --multirun \
  'hydra.sweep.dir=multirun_mdlm_seqlen512/${now:%Y-%m-%d}/${now:%H-%M-%S}' \
  'hydra.sweep.subdir="${hydra.job.num}_steps=${sampling.steps}_p=${sampling.nucleus_p}"' \
  mode=sample_eval \
  loader.batch_size=64 \
  loader.eval_batch_size=64 \
  sampling.num_sample_batches=64 \
  sampling.steps=32,64 \
  data.wrap=False \
  data=openwebtext-split \
  parameterization=subs \
  backbone=dit \
  model.length=512 \
  model.cond_dim_embedding=384 \
  seed=11 \
  sampling.predictor=ddpm_cache \
  sampling.remdm_mode=null \
  +sampling.t_on=$T_ON \
  +sampling.t_off=$T_OFF \
  +sampling.alpha_on=$ALPHA_ON \
  sampling.eta=0.008 \
  sampling.remaskator_temperature=1e-9 \
  sampling.remaskator_t_off=$T_OFF \
  sampling.remaskator_t_on=$T_ON \
  sampling.nucleus_p=0.9 \
  noise=loglinear \
  noise.t_off=$T_OFF \
  noise.t_on=$T_ON \
  sampling.remaskator_checkpoint_path=<path> \
  sampling.freeze_backbone=false \
  eval.checkpoint_path=<path> \
  text_embedder.use_text_embedder=false \
  text_embedder.use_condition_during_sampling_until=1.0 \
  text_embedder.embedding_ema_decay=0.0 \
  text_embedder.num_embedding_updates=0 \
  text_embedder.model_name=sentence-transformers/all-MiniLM-L6-v2 \
  text_embedder.cond_dropout=0.0 \
  text_embedder.random_projection_dim=null\
  text_embedder.noise=0.0 \
  wandb.name=sample_$(date +%Y%m%d_%H%M%S) \
  sampling.sample_embeddings_from=validation \
  sampling.gaussian_checkpoint_path=<path> \
  embedding_diffusion.num_layers=8 \
  embedding_diffusion.hidden_dim=512 \
  embedding_diffusion.net_type=transformer \
  embedding_diffusion.seq_len=8 \
  embedding_diffusion.num_heads=8 \
  embedding_diffusion.timesteps=1000 \
  embedding_diffusion.t_sampling_exponent=0.5 \
  +embedding_cache_dir=<path> \
  +wandb.offline=true 
