_target_: src.datamodules.CUBEmbContextsDataModuleV2
name: cub_emb_contexts

context_class_size: 50
context_minority_group_proportion: 0.1
query_minority_group_proportion: 0.5

dataset_path: ${oc.env:DATA_ROOT_DIR}/cub
encoding_extractor: ${encoding_extractor.name}

data_length: 1024

batch_size: 32
num_workers: 4

spurious_setting: ${spurious_setting}  # Use the global value of 'spurious_setting'
sp_token_generation_mode: ${sp_token_generation_mode}  # Use the global value of 'sp_token_generation_mode'

use_context_as_intermediate_queries: ${use_context_as_intermediate_queries}

rotate_encodings: False
n_rotation_matrices: 1000  # 1000 works well, but is slow. Reduce for efficiency when experimenting

label_noise_ratio_interval: null  # [min_ratio, max_ratio], the label noise ratio is uniformly sampled from this interval
input_noise_norm_interval: null  # [min_norm, max_norm], the norm of the Gaussian noise will be uniformly sampled from this interval
permute_input_dim: False

ask_context_prob: null

val_sets: null # null if there is a single val set, otherwise this is the list of names of val sets