__technical:
  - ${!setenv SAMPLE_TEMPERATURE ${!getenv SAMPLE_TEMPERATURE 1.}}
  - GIT_ID: ${!gitid}
_pb._skp.${-model_options switch USE_MODEL_SHORT}:
  llama3_70b:
    model_id: ${!getenv USE_MODEL_REPO meta-llama/Meta-Llama-3-70B}
    model_kwargs:
      attn_implementation: flash_attention_2
      torch_dtype: auto
      # device_map: auto  
      # load_in_8bit: True
      load_in_4bit: True
    # to stop the complaining, shouldnt make a difference here anyways since the input is rectangular
    tokenizer_kwargs: {}
    _pb._skp.${-model_options switch DATASET_ID}:
      BCB:
        ${-prompter construct generation.prompts PromptBuilderLlama3ForBCB}: {}
        ${-ds construct evaluation.code_data BCBDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 768}
        stop_on_tokens: []
        dset_split: 'v0.1.2'
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}: {}
        ${-ds construct evaluation.datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}
        # instruct models have multiple eos_token_ids in the model.generation_config, breaks the correct length detection if unaccounted
        stop_on_tokens: ['\n\n']
        dset_split: 'test'
      COQA:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets CoQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'
      SQUAD:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets SQUADv2 True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'  
  llama3_70b_i:
    model_id: ${!getenv USE_MODEL_REPO meta-llama/Meta-Llama-3-70B-Instruct}
    model_kwargs:
      attn_implementation: flash_attention_2
      torch_dtype: auto
      # device_map: auto  
      # load_in_8bit: True
      load_in_4bit: True
    # to stop the complaining, shouldnt make a difference here anyways since the input is rectangular
    tokenizer_kwargs: {}
    _pb._skp.${-model_options switch DATASET_ID}:
      BCB:
        ${-prompter construct generation.prompts PromptBuilderLlama3ForBCB}: {}
        ${-ds construct evaluation.code_data BCBDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 768}
        stop_on_tokens: []
        dset_split: 'v0.1.2'
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}: {}
        ${-ds construct evaluation.datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}
        # instruct models have multiple eos_token_ids in the model.generation_config, breaks the correct length detection if unaccounted
        stop_on_tokens: ['\n\n']
        dset_split: 'test'
      COQA:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets CoQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'
      SQUAD:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets SQUADv2 True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'  
  llama3_8b:
    model_id: ${!getenv USE_MODEL_REPO meta-llama/Meta-Llama-3-8B}
    model_kwargs:
      attn_implementation: flash_attention_2
    # to stop the complaining, shouldnt make a difference here anyways since the input is rectangular
    tokenizer_kwargs: {}
    _pb._skp.${-model_options switch DATASET_ID}:
      BCB:
        ${-prompter construct generation.prompts PromptBuilderLlama3ForBCB}: {}
        ${-ds construct evaluation.code_data BCBDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 768}
        stop_on_tokens: []
        dset_split: 'v0.1.2'
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}: {}
        ${-ds construct evaluation.datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}
        # instruct models have multiple eos_token_ids in the model.generation_config, breaks the correct length detection if unaccounted
        stop_on_tokens: ['\n\n']
        dset_split: 'test'
      KUQ:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-prompt construct generation.prompts PROMPT_KUQ_LLAMA_INSTRUCT_CUSTOMIZED False}: {}
        ${-ds construct evaluation.datasets KUQDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['\n']
        dset_split: 'test'
      TRIVIA:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets TriviaQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'
      COQA:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets CoQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'
      SQUAD:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets SQUADv2 True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'  
  llama3_8b_i:
    model_id: ${!getenv USE_MODEL_REPO meta-llama/Meta-Llama-3-8B-Instruct}
    model_kwargs:
      attn_implementation: flash_attention_2
    # to stop the complaining, shouldnt make a difference here anyways since the input is rectangular
    tokenizer_kwargs: {}
    _pb._skp.${-model_options switch DATASET_ID}:
      BCB:
        ${-prompter construct generation.prompts PromptBuilderLlama3ForBCB}: {}
        ${-ds construct evaluation.code_data BCBDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 768}
        stop_on_tokens: []
        dset_split: 'v0.1.2'
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}: {}
        ${-ds construct evaluation.datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}
        # instruct models have multiple eos_token_ids in the model.generation_config, breaks the correct length detection if unaccounted
        stop_on_tokens: ['\n\n', 128001]
        dset_split: 'test'
      KUQ:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-prompt construct generation.prompts PROMPT_KUQ_LLAMA_INSTRUCT_CUSTOMIZED False}: {}
        ${-ds construct evaluation.datasets KUQDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['\n', 128001]
        dset_split: 'test'
      TRIVIA:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets TriviaQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      COQA:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets CoQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
      SQUAD:
        ${-prompter construct generation.prompts PromptBuilderLlama3I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets SQUADv2 True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n', 128001]
        dset_split: 'validation'
  phi35:
    model_id: ${!getenv USE_MODEL_REPO microsoft/Phi-3-mini-4k-instruct}
    model_kwargs:
      attn_implementation: flash_attention_2
    tokenizer_kwargs: {}
    _pb._skp.${-model_options switch DATASET_ID}:
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderPhi35I}: {}
        ${-ds construct evaluation.datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}
        stop_on_tokens: ['\n\n', 32007, 32001]
        dset_split: 'test'
      BCB:
        ${-prompter construct generation.prompts PromptBuilderLlama3ForBCB}: {}
        ${-ds construct evaluation.code_data BCBDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 768}
        stop_on_tokens: [32007, 32001]
        dset_split: 'v0.1.2'
  phi35_i:
    model_id: ${!getenv USE_MODEL_REPO microsoft/Phi-3.5-mini-instruct}
    model_kwargs:
      attn_implementation: flash_attention_2
    tokenizer_kwargs: {}
    _pb._skp.${-model_options switch DATASET_ID}:
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderPhi35I}: {}
        ${-ds construct evaluation.datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}
        stop_on_tokens: ['\n\n', 32007, 32001]
        dset_split: 'test'
      KUQ:
        ${-prompter construct generation.prompts PromptBuilderPhi35I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets KUQDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['\n', 32007, 32001]
        dset_split: 'test'
      TRIVIA:
        ${-prompter construct generation.prompts PromptBuilderPhi35I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets TriviaQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n', 32007, 32001]
        dset_split: 'validation'
      COQA:
        ${-prompter construct generation.prompts PromptBuilderPhi35I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets CoQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n', 32007, 32001]
        dset_split: 'validation'  
      SQUAD:
        ${-prompter construct generation.prompts PromptBuilderPhi35I}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets SQUADv2 True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 128}
        stop_on_tokens: ['.', '\n', 32007, 32001]
        dset_split: 'validation'  
      BCB:
        ${-prompter construct generation.prompts PromptBuilderLlama3ForBCB}: {}
        ${-ds construct evaluation.code_data BCBDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 768}
        stop_on_tokens: [32007, 32001]
        dset_split: 'v0.1.2'
  falcon_mamba_i:
    model_id: ${!getenv USE_MODEL_REPO tiiuae/Falcon3-Mamba-7B-Instruct}
    model_kwargs:
      torch_dtype: auto
    tokenizer_kwargs: {}
    _pb._skp.${-model_options switch DATASET_ID}:
      BCB:
        ${-prompter construct generation.prompts PromptBuilderLlama3ForBCB}: {}
        ${-ds construct evaluation.code_data BCBDataset True}: {}
          # add_few_shot_examples: 2
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 768}
        stop_on_tokens: []
        dset_split: 'v0.1.2'
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderFalcon3}: {}
        ${-ds construct evaluation.datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}
        stop_on_tokens: ['\n\n']
        dset_split: 'test'
      TRIVIA:
        ${-prompter construct generation.prompts PromptBuilderNonIFT}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets TriviaQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 64}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'
      COQA:
        ${-prompter construct generation.prompts PromptBuilderNonIFT}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets CoQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 64}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'  
      SQUAD:
        ${-prompter construct generation.prompts PromptBuilderNonIFT}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets SQUADv2 True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 64}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'  
  falcon_mamba:
    model_id: ${!getenv USE_MODEL_REPO tiiuae/Falcon3-Mamba-7B-Base}
    model_kwargs:
      torch_dtype: auto
    tokenizer_kwargs: {}
    _pb._skp.${-model_options switch DATASET_ID}:
      BCB:
        ${-prompter construct generation.prompts PromptBuilderLlama3ForBCB}: {}
        ${-ds construct evaluation.code_data BCBDataset True}: {}
          # add_few_shot_examples: 2
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 768}
        stop_on_tokens: []
        dset_split: 'v0.1.2'
      COLLIE:
        ${-prompter construct generation.prompts PromptBuilderNonIFT}: {}
        ${-ds construct evaluation.datasets COLLIEDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 256}
        stop_on_tokens: ['\n\n']
        dset_split: 'test'
      KUQ:
        ${-prompter construct generation.prompts PromptBuilderNonIFT}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets KUQDataset True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 64}
        stop_on_tokens: ['\n']
        dset_split: 'test'
      TRIVIA:
        ${-prompter construct generation.prompts PromptBuilderNonIFT}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets TriviaQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 64}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'
      COQA:
        ${-prompter construct generation.prompts PromptBuilderNonIFT}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets CoQA True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 64}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'  
      SQUAD:
        ${-prompter construct generation.prompts PromptBuilderNonIFT}:
          role_prefix_mapping:
            user: "Question: "
            assistant: "Answer: "
        ${-ds construct evaluation.datasets SQUADv2 True}: {}
        __technical: ${!setenv SAMPLE_MAX_NEW_TOKS 64}
        stop_on_tokens: ['.', '\n']
        dset_split: 'validation'  
save_every: ${!getenv SAMPLE_SAVE_EVERY 256}
dset_id: ${!getenv DATASET_ID}
use_device: ${!getenv USE_DEVICE cuda:0}
n_samples: ${!getenv SAMPLE_NUM_SAMPLES 10}
sampling_kwargs:
  top_p: ${!getenv SAMPLE_TOP_P 0.95}
  top_k: ${!getenv SAMPLE_TOP_K 10}
  temperature: ${!getenv SAMPLE_TEMPERATURE}
  max_new_tokens: ${!getenv SAMPLE_MAX_NEW_TOKS 64}
  _pb._skp.${-model_options switch USE_MODEL_SHORT ___}:
    # SSMs seem to have a repetition problem! (match to bs!)
    falcon_mamba:
      repetition_penalty: 1.2
    rwkv6_7b:
      repetition_penalty: 1.2
    ___: {} # default no changes to this
bs_kwargs:
  num_beams: ${!getenv BS_NUM_BEAMS 10}
  max_new_tokens: ${!getenv SAMPLE_MAX_NEW_TOKS 64}
  _pb._skp.${-model_options switch USE_MODEL_SHORT ___}:
    # SSMs seem to have a repetition problem!
    falcon_mamba:
      repetition_penalty: 1.2
      no_repeat_ngram_size: 10
    rwkv6_7b:
      repetition_penalty: 1.2
      no_repeat_ngram_size: 10
    ___: {} # default no changes to this

dset_range_start: ${!getenv DSET_RANGE_START 0}
dset_range_end: ${!getenv DSET_RANGE_END -1}
save_dir: ${!getenv SAMPLE_SAVE_DIR ./sample}/${!getenv DATASET_ID}_${!getenv USE_MODEL_SHORT}_${!getenv SAMPLE_TEMPERATURE}
# don't return them by default: transition probs are calculated anyways, hard to make anythin out of those, not needed for any current algo
return_logits_for_bs: False
pigz_temp_storage: ${!getenv PIGGZ_TEMP_DIR }
return_all_hidden: ${!getenv SAMPLE_RETURN_ALL_HIDDEN False}
ok_beamer: ${!getenv SKIP_BEAM_SEARCH False}
