max_steps: 6
num_sentences: 100
task: "qa3_three-supporting-facts"  #"qa2_two-supporting-facts" #"qa1_single-supporting-fact"
noise_train_path: "../datasets/babilong/pg19-with-sentences/train"
noise_test_path: "../datasets/babilong/pg19-with-sentences/test"
facts_train_path: "../datasets/babilong/tasks_1-20_v1-2/en-10k/${envs.task}_train.txt"
facts_test_path: "../datasets/babilong/tasks_1-20_v1-2/en-10k/${envs.task}_test.txt"

positions_processor: relative

positions_processor_dict:
  none:
    #this choice allows to ignore positional embeddings
    #but inside the environment we still need to enumerate chunks
    _target_: envs.text_env.AbsolutePositionProcessor
  absolute:
    _target_: envs.text_env.AbsolutePositionProcessor
  random:
    _target_: envs.text_env.RandomPositionProcessor
    max_chunks_count: 2000
  relative:
    _target_: envs.text_env.RelativePositionProcessor
    step_size: 20


train_dataset: &train_dataset
  _target_: envs.RetrievalBabiLong
  num_sentences: ${envs.num_sentences}

  task_dataset:
    _target_: envs.dataloaders.babilong.TaskDataset
    dataset_path: ${envs.facts_train_path}

  noise_sentence_sampler:
    _target_: envs.dataloaders.babilong.RetrSentenceSampler
    dataset:
      _target_: datasets.load_from_disk
      dataset_path: ${envs.noise_train_path}

test_dataset: &test_dataset
  _target_: envs.RetrievalBabiLong
  num_sentences: ${envs.num_sentences}

  task_dataset:
    _target_: envs.dataloaders.babilong.TaskDataset
    dataset_path: ${envs.facts_test_path}

  noise_sentence_sampler:
    _target_: envs.dataloaders.babilong.RetrSentenceSampler
    dataset:
      _target_: datasets.load_from_disk
      dataset_path: ${envs.noise_test_path}

# Core Environment Configuration
env:
  _target_: envs.qa_env.QAEnv
  max_steps: ${envs.max_steps}
  action_embed_length: ${max_action_length}
  separator: " [SEP] "        
  sort_by_index: True
  positions_processor: ${envs.positions_processor_dict.${envs.positions_processor}}
  feedback_model: ${feedback.feedback_dict.${feedback.type}}
#    _target_: rl.feedback.GroundTruthFeedback
#    penalize_extra_steps: False
  #reward_model:
  #  _target_: envs.qa_env.PositionalGTReward
  # Dataset Configuration
  dataset:
    _target_: envs.QADatasetAdapter
    dataset: *train_dataset


test_env:
  _target_: envs.qa_env.QAEnv
  max_steps: ${envs.env.max_steps}
  action_embed_length: ${max_action_length}
  separator: ${envs.env.separator}       
  sort_by_index: ${envs.env.sort_by_index}
  positions_processor: ${envs.env.positions_processor}
  feedback_model:
    _target_: rl.feedback.GroundTruthFeedback
    penalize_extra_steps: False
  #reward_model:
  #  _target_: envs.qa_env.PositionalGTReward
  dataset:
    _target_: envs.QADatasetAdapter
    dataset: *test_dataset

