poetry run python3 topollm/pipeline_scripts/run_pipeline_compute_embeddings_and_data_prep_and_local_estimate.py --multirun hydra/sweeper=basic data=iclr_2024_submissions_validation,multiwoz21_validation,one-year-of-tsla-on-reddit_validation,sgd_validation,wikitext-103-v1_validation data.data_subsampling.sampling_mode=random data.data_subsampling.number_of_samples=10000 data.data_subsampling.sampling_seed=777 +data.dataset_type=huggingface_dataset_named_entity tokenizer.add_prefix_space=True language_model=roberta-base embeddings.embedding_data_handler.mode=regular embeddings.embedding_extraction.layer_indices="[-1],[-2],[-3],[-4],[-5],[-6],[-7],[-8],[-9],[-10],[-11],[-12]" embeddings_data_prep.sampling.num_samples=150000 embeddings_data_prep.sampling.sampling_mode=random embeddings_data_prep.sampling.seed=42 local_estimates.pointwise.n_neighbors_mode=absolute_size local_estimates.filtering.deduplication_mode=array_deduplicator local_estimates.filtering.num_samples=60000 local_estimates.pointwise.absolute_n_neighbors=128 feature_flags.wandb.use_wandb=true hydra/launcher=basic