_target_: data.ass_recall.ar_dataloader.InContextAssociationDataloader
vocab_size: 50            # Overall vocabulary size
n_pairs: 7                # Number of in-context pairs
device: cuda              # Or 'cpu', ${trainer.devices}, etc.
block_size: 16            # Will be automatically set to 2*n_pairs+2=16

# --- Arguments for Task Distribution Parameters ---
# These control the parameters for the task's internal data generation.

# Arguments for the TRAINING set Task generator
train_dist_args:
  vocab_size: ${..vocab_size}  # Uses the top-level vocab_size defined above
  alpha: 0.                    # Zipfian exponent for frequency distribution (0 for uniform)
  burstiness: 1.0              # Average number of times the query pair appears in the context
  p_celebs: 0.                # Proportion of vocabulary to be considered "celebrities"
  w_celebs: 0.5                # Weight for celebrity tokens

# Arguments for the TESTING set Task generator
test_dist_args:
  vocab_size: ${..vocab_size}  # Uses the top-level vocab_size defined above
  alpha: 0.0                   # Use uniform distribution for testing
  burstiness: 1.0              # Query appears once on average in test
  p_celebs: 0.0                # No celebrity tokens in test
  w_celebs: 0.5                # Weight doesn't matter when p_celebs=0