T_ON=0.55
T_OFF=0.05
ALPHA_ON=0.9


CUDA_VISIBLE_DEVICES=0 python main.py \
  --multirun \
  'hydra.sweep.dir=multirun_remaskator_seqlen128/${now:%Y-%m-%d}/${now:%H-%M-%S}' \
  'hydra.sweep.subdir="${hydra.job.num}_steps=${sampling.steps}_remaskator_temp=${sampling.remaskator_temperature}_denoiser_temp=${sampling.denoiser_temp_during_remasking}"' \
  mode=sample_eval \
  loader.batch_size=64 \
  loader.eval_batch_size=64 \
  sampling.num_sample_batches=64 \
  sampling.steps=128 \
  data.wrap=False \
  data=openwebtext-split \
  parameterization=subs \
  backbone=dit \
  model.length=128 \
  model.cond_dim_embedding=384 \
  seed=11 \
  sampling.predictor=remaskator \
  sampling.remdm_mode=null \
  +sampling.t_on=$T_ON \
  +sampling.t_off=$T_OFF \
  +sampling.alpha_on=$ALPHA_ON \
  sampling.eta=0.008 \
  sampling.remaskator_temperature=0.0,0.1,0.5,1.0,2.0 \
  sampling.remaskator_t_off=$T_OFF \
  sampling.remaskator_t_on=$T_ON \
  sampling.nucleus_p=0.9 \
  sampling.denoiser_temp_during_remasking=0.0,0.5,1.0,2.0,4.0,8.0,1e100 \
  noise=loop \
  noise.t_off=$T_OFF \
  noise.t_on=$T_ON \
  sampling.remaskator_checkpoint_path=<path> \
  sampling.freeze_backbone=false \
  eval.checkpoint_path=<path> \
  text_embedder.use_text_embedder=false \
  text_embedder.use_condition_during_sampling_until=1.0 \
  text_embedder.embedding_ema_decay=0.0 \
  text_embedder.num_embedding_updates=0 \
  text_embedder.model_name=sentence-transformers/all-MiniLM-L6-v2 \
  text_embedder.cond_dropout=0.0 \
  text_embedder.random_projection_dim=null\
  text_embedder.noise=0.0 \
  wandb.name=sample_$(date +%Y%m%d_%H%M%S) \
  sampling.sample_embeddings_from=validation \
  sampling.gaussian_checkpoint_path=null \
  embedding_diffusion.num_layers=8 \
  embedding_diffusion.hidden_dim=512 \
  embedding_diffusion.net_type=transformer \
  embedding_diffusion.seq_len=8 \
  embedding_diffusion.num_heads=8 \
  embedding_diffusion.timesteps=1000 \
  embedding_diffusion.t_sampling_exponent=0.5 \
  +wandb.offline=true 
