hydra:
  job:
    chdir: false
  run:
    dir: logs/${now:%Y-%m-%d_%H-%M-%S}

output_file: ???                 # refer to `bm25_retriever.yaml` for explanation
num_candidates: 1
num_ice: 25
task_name: ???

dataset_path: null
batch_size: 32                  # the batch size when running encoding
model_name: ???                 # 'bert-base-multilingual-uncased' #'bert-base-uncased'  # model used to encode 'field' for each index instance
faiss_index: ""                 # if file exists, the encoded index data will be directly loaded
embedding_npy: ???            
pretrained_model_path: ???      # the local pretrained encoder to load, load `model_name` if is null
# all_data: ???
# test_lang_name: ???
# DPP-related arguments
dpp_search: false                # whether to employ DPP search that considers diversity between ices
dpp_topk: 100                    # the number to retrieve by TopK in the first stage of DPP
mode: cand_k_dpp                # pure_random, cand_random, cand_k_dpp, map

# parameters needed to initialize the input dataset
dataset_reader:
  _target_: src.dataset_readers.base_dsr.BaseDatasetReader
  task_name: ${task_name}
  model_name: ${model_name}
  field: q
  dataset_path: ${dataset_path}
  ds_size: null
  dataset_split: validation
  # all_data: ${all_data}
  # test_lang_name: ${test_lang_name}

# parameters needed to initialize the index_reader
index_reader:
  _target_: src.dataset_readers.index_dsr.IndexDatasetReader
  task_name: ${task_name}
  model_name: ${model_name}
  field: qa
  dataset_split: train
  dataset_path: null
  ds_size: null
  ds_segment: 0            # i'th segment of ds_size, if null, then defualt to 0, i.e. the first segment; if is 1, then indexes (ds_size, 2*ds_size)
  # all_data: ${all_data}
  # test_lang_name: ${test_lang_name}

# parameters needed to initialize the bi-encoder model
model_config:
  _target_: src.models.biencoder.BiEncoderConfig
  q_model_name: ${model_name}
  ctx_model_name: ${model_name}
  norm_embed: false
  scale_factor: 0.1  # the factor used to adjust the scale of relevance term and to trade-off diversity and relevance
