experiment_name: msmarco-roberta_base-hobit-inbatch
experiment_dir: ./experiments/msmarco-roberta_base-hobit-inbatch
seed: 42
data:
  dataset_name: msmarco
  collection_path: ./data/MSMARCO/passage/collection.tsv
  queries_train_path: ./data/MSMARCO/passage/queries.train.tsv
  qrels_train_path: ./data/MSMARCO/passage/qrels.train.tsv
  queries_dev_path: ./data/MSMARCO/passage/queries.dev.tsv
  qrels_dev_path: ./data/MSMARCO/passage/qrels.dev.small.tsv
  eval_collection_path: ./data/MSMARCO/passage/collection.tsv

model:
  encoder_name: roberta-base
  pooling_strategy: cls
  embedding_dim: 768
  normalize_embeddings: true
  max_query_length: 32
  max_doc_length: 256
  query_prefix: ''
  document_prefix: ''
loss:
  name: infonce
  temperature: 0.05
  use_mined_negatives: false
  use_sampled_negatives: false
  use_inbatch_negatives: true
  gather_across_gpus: false
training:
  num_epochs: 40
  per_gpu_batch_size: 128
  gradient_accumulation_steps: 1
  max_mined_negatives_per_query: 0
  optimizer: adamw
  learning_rate: 1.0e-05
  weight_decay: 0.01
  adam_epsilon: 1.0e-08
  max_grad_norm: 1.0
  fp16: true
  checkpoint_frequency: 1
  early_stopping_patience: 5
  enable_evaluation: true
  log_steps: 100
  early_stopping_metric: mrr@10
  warmup_steps: 800
  max_sampled_negatives_per_query: 0
  max_positives_per_query: 4
embedding_generation:
  enabled: true
  frequency: 1
  generate_queries: true
  generate_documents: true
  batch_size: 8000
batch_sampler:
  name: hobit
  enabled: true
  frequency: 1
  args:
    batch_size: 128
    topk: 200
    alpha: 1.0
    tau_h: 0.05
    num_batch_seeds: 8
    random_epochs: 0
    seed_selection: weighted_random
    multi_positive_strategy: mean
    max_positives_per_query: 64
    use_gpu: true
    topk_batch_size: 10000
    exp_clip: 20.0
negative_sampler:
  name: random
  enabled: false
  frequency: 1
  num_samples: 0
  args: {}
logging:
  log_level: INFO
  log_file: training.log
  tensorboard_dir: log/tensorboard
