hydra:
  job:
    chdir: false
  run:
    dir: logs/${now:%Y-%m-%d_%H-%M-%S}

output_file: ???                 # refer to `bm25_retriever.yaml` for explanation
num_candidates: 1
num_ice: 5
task_name: ???

dataset_path: null
batch_size: 32                  # the batch size when running encoding
model_name: ???                 # 'bert-base-multilingual-uncased' #'bert-base-uncased'  # model used to encode 'field' for each index instance
faiss_index: ???                # if file exists, the encoded index data will be directly loaded
pretrained_model_path: ???      # the local pretrained encoder to load, load `model_name` if is null
run_for_n_samples: 0            # the number of samples to run, 0 means all samples
lambd: 1.0                      # the lambda value used in the kernel function, 1.0 is a good value for most cases
beta: 0.02                      # the beta value used in the kernel function, 0.02 is a good value for most cases
lengthscale: 1.0                # the lengthscale value used in the kernel function, 1.0 is a good value for most cases
use_polynomial_kernel: false    # if true, use polynomial kernel, otherwise use Gaussian kernel

# parameters needed to initialize the input dataset
dataset_reader:
  _target_: src.dataset_readers.base_dsr.BaseDatasetReader
  task_name: ${task_name}
  model_name: ${model_name}
  field: q
  dataset_path: ${dataset_path}
  ds_size: null
  dataset_split: validation

# parameters needed to initialize the index_reader
index_reader:
  _target_: src.dataset_readers.index_dsr.IndexDatasetReader
  task_name: ${task_name}
  model_name: ${model_name}
  field: qa
  dataset_split: train
  dataset_path: null
  ds_size: null
  ds_segment: 0            # i'th segment of ds_size, if null, then defualt to 0, i.e. the first segment; if is 1, then indexes (ds_size, 2*ds_size)

# parameters needed to initialize the bi-encoder model
model_config:
  _target_: src.models.biencoder.BiEncoderConfig
  q_model_name: ${model_name}
  ctx_model_name: ${model_name}
  norm_embed: true
  scale_factor: 0.1  # the factor used to adjust the scale of relevance term and to trade-off diversity and relevance
