__technical:
  # - ${!setenv HF_HOME .  # so as to not include default value later on
  - ${!setenv SAMPLE_TEMPERATURE ${!getenv SAMPLE_TEMPERATURE 1.}}
  - GIT_ID: ${!gitid}
_pb._skp.${-model_options switch USE_MODEL_SHORT}:
  qwen2_32b_i:
    model_id: ${!getenv USE_MODEL_REPO Qwen/Qwen2.5-32B-Instruct}
    model_kwargs: {}
    __technical: ${!setenv TP_SIZE 2}${!setenv EOS_TOKEN_ID 151645}${!setenv EXTRA_EOS_IDS [151643]}
      # attn_implementation: flash_attention_2
    # to stop the complaining, shouldnt make a difference here anyways since the input is rectangular
    tokenizer_kwargs: {}
    _pb._skp.${-model_options switch DATASET_ID}:
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderQwen25}: {}
        ${-ds construct evaluation.qa_datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}${!setenv EXTRA_STOP_CONTAINS []}
        # instruct models have multiple eos_token_ids in the model.generation_config, breaks the correct length detection if unaccounted
        # stop_on_tokens: ['\n\n', 128001]
        dset_split: 'test'
      TRIVIA:
        ${-prompter construct generation.prompts PromptBuilderQwen25}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets TriviaQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      COQA:
        ${-prompter construct generation.prompts PromptBuilderQwen25}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets CoQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      COQAPERT:
        ${-prompter construct generation.prompts PromptBuilderQwen25}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.perturbation_text CoQAPerturb True}:
          perturb_strength: [0., 0.2]
          perturb_type: 'shuffle'
          slim: 1024
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      SQUADPERT:
        ${-prompter construct generation.prompts PromptBuilderQwen25}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.perturbation_text SQUADPerturb True}:
          perturb_strength: [0., 0.2]
          perturb_type: 'shuffle'
          slim: 1024
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      SQUAD:
        ${-prompter construct generation.prompts PromptBuilderQwen25}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets SQUADv2 True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      BCB:
        ${-prompter construct generation.prompts PromptBuilderLlama3ForBCB}: {}
        ${-ds construct evaluation.code_data BCBDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 768}
        stop_on_tokens: []
        dset_split: 'v0.1.2'
  qwen2_7b_i:
    model_id: ${!getenv USE_MODEL_REPO Qwen/Qwen2.5-7B-Instruct}
    model_kwargs: {}
      # attn_implementation: flash_attention_2
    __technical: ${!setenv EOS_TOKEN_ID 151645}${!setenv EXTRA_EOS_IDS [151643]}
    # to stop the complaining, shouldnt make a difference here anyways since the input is rectangular
    tokenizer_kwargs: {}
    _pb._skp.${-model_options switch DATASET_ID}:
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderQwen25}: {}
        ${-ds construct evaluation.qa_datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}${!setenv EXTRA_STOP_CONTAINS []}
        # instruct models have multiple eos_token_ids in the model.generation_config, breaks the correct length detection if unaccounted
        # stop_on_tokens: ['\n\n', 128001]
        dset_split: 'test'
      TRIVIA:
        ${-prompter construct generation.prompts PromptBuilderQwen25}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets TriviaQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      COQAPERT:
        ${-prompter construct generation.prompts PromptBuilderQwen25}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.perturbation_text CoQAPerturb True}:
          perturb_strength: [0., 0.2]
          perturb_type: 'shuffle'
          slim: 1024
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      SQUADPERT:
        ${-prompter construct generation.prompts PromptBuilderQwen25}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.perturbation_text SQUADPerturb True}:
          perturb_strength: [0., 0.2]
          perturb_type: 'shuffle'
          slim: 1024
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      COQA:
        ${-prompter construct generation.prompts PromptBuilderQwen25}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets CoQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      SQUAD:
        ${-prompter construct generation.prompts PromptBuilderQwen25}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets SQUADv2 True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      BCB:
        ${-prompter construct generation.prompts PromptBuilderLlama3ForBCB}: {}
        ${-ds construct evaluation.code_data BCBDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 768}
        stop_on_tokens: []
        dset_split: 'v0.1.2'
  llama3_70b_i:
    model_id: ${!getenv USE_MODEL_REPO meta-llama/Meta-Llama-3-70B-Instruct}
    model_kwargs: {}
      # attn_implementation: flash_attention_2
    # to stop the complaining, shouldnt make a difference here anyways since the input is rectangular
    tokenizer_kwargs: {}
    __technical: ${!setenv EOS_TOKEN_ID 128001}${!setenv EXTRA_EOS_IDS []}
    _pb._skp.${-model_options switch DATASET_ID}:
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}: {}
        ${-ds construct evaluation.qa_datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}${!setenv EXTRA_STOP_CONTAINS []}
        # instruct models have multiple eos_token_ids in the model.generation_config, breaks the correct length detection if unaccounted
        # stop_on_tokens: ['\n\n', 128001]
        dset_split: 'test'
      BCB:
        ${-prompter construct generation.prompts PromptBuilderLlama3ForBCB}: {}
        ${-ds construct evaluation.code_data BCBDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 768}
        stop_on_tokens: []
        dset_split: 'v0.1.2'
      COQAPERT:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.perturbation_text CoQAPerturb True}:
          perturb_strength: [0., 0.2]
          perturb_type: 'shuffle'
          slim: 1024
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      SQUADPERT:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.perturbation_text SQUADPerturb True}:
          perturb_strength: [0., 0.2]
          perturb_type: 'shuffle'
          slim: 1024
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
  llama3_8b_i:
    model_id: ${!getenv USE_MODEL_REPO meta-llama/Meta-Llama-3-8B-Instruct}
    model_kwargs: {}
      # attn_implementation: flash_attention_2
    # to stop the complaining, shouldnt make a difference here anyways since the input is rectangular
    tokenizer_kwargs: {}
    __technical: ${!setenv EOS_TOKEN_ID 128001}${!setenv EXTRA_EOS_IDS []}
    _pb._skp.${-model_options switch DATASET_ID}:
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}: {}
        ${-ds construct evaluation.qa_datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}${!setenv EXTRA_STOP_CONTAINS []}
        # instruct models have multiple eos_token_ids in the model.generation_config, breaks the correct length detection if unaccounted
        # stop_on_tokens: ['\n\n', 128001]
        dset_split: 'test'
      TRIVIA:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets TriviaQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      COQA:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets CoQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      COQAPERT:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.perturbation_text CoQAPerturb True}:
          perturb_strength: [0., 0.2]
          perturb_type: 'shuffle'
          slim: 1024
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      SQUADPERT:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.perturbation_text SQUADPerturb True}:
          perturb_strength: [0., 0.2]
          perturb_type: 'shuffle'
          slim: 1024
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      SQUAD:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets SQUADv2 True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
  phi35_i:
    model_id: ${!getenv USE_MODEL_REPO microsoft/Phi-3.5-mini-instruct}
    model_kwargs: {}
      # attn_implementation: flash_attention_2
    tokenizer_kwargs: {}
    __technical: ${!setenv EOS_TOKEN_ID 32001}${!setenv EXTRA_EOS_IDS [32007]}
    _pb._skp.${-model_options switch DATASET_ID}:
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderPhi35I}: {}
        ${-ds construct evaluation.qa_datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}${!setenv EXTRA_STOP_CONTAINS []}
        # stop_on_tokens: ['\n\n', 32007, 32001]
        dset_split: 'test'
      TRIVIA:
        ${-prompter construct generation.prompts PromptBuilderPhi35I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets TriviaQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 32007, 32001]
        dset_split: 'validation'
      COQA:
        ${-prompter construct generation.prompts PromptBuilderPhi35I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets CoQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 32007, 32001]
        dset_split: 'validation'  
      SQUAD:
        ${-prompter construct generation.prompts PromptBuilderPhi35I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.qa_datasets SQUADv2 True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}${!setenv EXTRA_STOP_CONTAINS ['\.']}${!setenv STOP_ON_ANY_NEWLINES True}
        # stop_on_tokens: ['.', '\n', 32007, 32001]
        dset_split: 'validation'
save_every: ${!getenv SAMPLE_SAVE_EVERY 16}
dset_id: ${!getenv DATASET_ID}
use_device: ${!getenv USE_DEVICE cuda:0}
rollouts:
  ms:  
    generate_kwargs:
      n: ${!getenv SAMPLE_NUM_SAMPLES 10}
      temperature: ${!getenv SAMPLE_TEMPERATURE 0.6}
      max_tokens: ${!getenv SAMPLE_MAX_NEW_TOKS 512}
      top_p: ${!getenv SAMPLE_TOP_P 0.99}
      top_k: ${!getenv SAMPLE_TOP_K 50}
      seed: 42
      stop: []
      include_stop_str_in_output: True
      ${-stop_token_ids partial generation.gen_utils construct_token_lists}:
        any_double_newlines: ${!getenv STOP_ON_DOUBLE_NEWLINES True}
        any_newlines: ${!getenv STOP_ON_ANY_NEWLINES False}
        regex_contains: ${!getenv EXTRA_STOP_CONTAINS []}
        added_ids_list: ${!getenv EXTRA_EOS_IDS []}
      logprobs: 20
      prompt_logprobs: 20
      spaces_between_special_tokens: True
    store_logits: True
    store_model_state: False
    beam_search: False
  bs:
    generate_kwargs:
      beam_width: ${!getenv SAMPLE_NUM_BEAMS 10}
      max_tokens: ${!getenv SAMPLE_MAX_NEW_TOKS 512}
      include_stop_str_in_output: True
      # ${-stop_token_ids partial generation.gen_utils construct_token_lists}:
      #   any_double_newlines: ${!getenv STOP_ON_DOUBLE_NEWLINES True}
      #   any_newlines: ${!getenv STOP_ON_ANY_NEWLINES False}
      #   regex_contains: ${!getenv EXTRA_STOP_CONTAINS []}
      #   added_ids_list: ${!getenv EXTRA_EOS_IDS []}
      # logprobs: 20
      # prompt_logprobs: 20
      ignore_eos: False
      temperature: ${!getenv SAMPLE_TEMPERATURE 0.6}
      # length_penalty: 1.0
      include_stop_str_in_output: True
    beam_search: True
    store_logits: True
    store_model_state: False
dset_range_start: ${!getenv DSET_RANGE_START 0}
dset_range_end: ${!getenv DSET_RANGE_END 64}
save_dir: ${!getenv SAMPLE_SAVE_DIR ./sample/}/${!getenv DATASET_ID}_${!getenv USE_MODEL_SHORT}_${!getenv SAMPLE_TEMPERATURE}
pigz_temp_storage: ${!getenv PIGGZ_TEMP_DIR }
return_all_hidden: ${!getenv SAMPLE_RETURN_ALL_HIDDEN False}
# ok_beamer: ${!getenv SKIP_BEAM_SEARCH False}
# logitproc:
#   ${-extrastoptokens partial generation.gen_utils ExtraEOSTokenLogitsProcessorWithConstructor}:
#     eos_token_id: ${!getenv EOS_TOKEN_ID}
hardware_config:
  task: 'generate'
  trust_remote_code: True
  dtype: bfloat16
  seed: 42
  max_model_len: 4096
  tensor_parallel_size: ${!getenv TP_SIZE 1}
  gpu_memory_utilization: 0.9
  max_seq_len_to_capture: 8196
  # distributed_executor_backend: 'mp'
  download_dir: ${!getenv HF_HOME }
