- cfg:
    batch_size: 256
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 512
    max_prompt_tokens: 1536
    model: google/gemma-2-27b
    num_samples: 100
    output_parser: passthrough_output_parser
    prompt: passthrough
    stop_tokens:
      - "# SOLUTION END"
      - </code>
    task: ds1000-simple
    temperature: 0.2
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/google_gemma-2-27b-ds1000-simple-sample100-20240721-142353
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/google_gemma-2-27b-ds1000-simple-sample100-20240721-142353
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/ds1000-simple.npz
- cfg:
    batch_size: 128
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 512
    max_prompt_tokens: 1536
    model: google/gemma-2-27b-it
    num_samples: 25
    output_parser: passthrough_output_parser
    prompt: gemma-math-4-shot
    stop_tokens:
      - "Problem:"
    task: math-test
    temperature: 0.6
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/google_gemma-2-27b-it_math-sample25
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/google_gemma-2-27b-it_math-sample25
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/math-test.npz
- cfg:
    logprobs: true
    model: gpt-4o-mini
    num_samples: 25
    system_prompt:
      Answer the following question. In your response, only write the
      raw code, do not use markdown and do not add explanations
    task: ds1000-simple
    temperature: 0.2
    top_p: 0.95
    user_prompt: passthrough
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/gpt-4o-mini_ds1000-simple_20240730074759
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/gpt-4o-mini_ds1000-simple_20240730074759
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/ds1000-simple.npz
- cfg:
    logprobs: true
    model: gpt-4o-mini
    num_samples: 25
    system_prompt: null
    task: gsm8k-main-test
    temperature: 0.2
    top_p: 0.95
    user_prompt: gsm8k_cot_zeroshot
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/gpt-4o-mini_gsm8k-main-test_20240731052709
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/gpt-4o-mini_gsm8k-main-test_20240731052709
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/gsm8k-main-test.npz
- cfg:
    logprobs: true
    model: gpt-4o-mini
    num_samples: 25
    system_prompt:
      Answer the following questions. Use the answer format provided
      in the examples. All of your latex expressions must be wrapped in $...$ (for
      example, to write 'x=2' as latex, write $x=2$).
    task: math-test
    temperature: 0.6
    top_p: 0.95
    user_prompt: math-4-shot
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/gpt-4o-mini_math-test_20240730181855
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/gpt-4o-mini_math-test_20240730181855
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/math-test.npz
- cfg:
    logprobs: true
    model: gpt-4o-mini
    num_samples: 25
    system_prompt:
      Answer the following question. In your response, only write the
      raw code, do not use markdown and do not add explanations.
    task: mbpp-all
    temperature: 0.2
    top_p: 0.95
    user_prompt: mbpp_single_testcase
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/gpt-4o-mini_mbpp-all_20240730075941
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/gpt-4o-mini_mbpp-all_20240730075941
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/mbpp-all.npz
- cfg:
    logprobs: true
    model: gpt-4o-mini
    num_samples: 25
    system_prompt: null
    task: trivia-qa
    temperature: 0.2
    top_p: 0.95
    user_prompt: trivia_qa_5_shot_with_description
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/gpt-4o-trivia-qa-5-shot-with-description
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/gpt-4o-trivia-qa-5-shot-with-description
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/trivia-qa.npz
- cfg:
    batch_size: 128
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 512
    max_prompt_tokens: 1536
    model: meta-llama/Meta-Llama-3-8B
    num_samples: 100
    output_parser: gsm8k_first_number_from_last_line
    prompt: gsm8k_cot_zeroshot
    stop_tokens:
      - "Q:"
      - </s>
      - <|im_end|>
    task: gsm8k-main-test
    temperature: 0.2
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/meta-llama_Meta-Llama-3-8B_gsm8k-test-cot-zeroshot_sample100_20240507-134608
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/meta-llama_Meta-Llama-3-8B_gsm8k-test-cot-zeroshot_sample100_20240507-134608
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/gsm8k-main-test.npz
- cfg:
    batch_size: 64
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 512
    max_prompt_tokens: 1536
    model: meta-llama/Meta-Llama-3-8B
    num_samples: 100
    output_parser: extract_first_function
    prompt: mbpp_single_testcase
    stop_tokens:
      - "

        class"
      - "

        assert"
      - '

        """'
      - "

        print"
      - "

        if"
      - "

        <|/"
      - "

        ```"
      - <|endoftext|>
    task: mbpp-all
    temperature: 0.2
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/meta-llama_Meta-Llama-3-8B_mbpp-all
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/meta-llama_Meta-Llama-3-8B_mbpp-all
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/mbpp-all.npz
- cfg:
    batch_size: 256
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 32
    max_prompt_tokens: 1536
    model: meta-llama/Meta-Llama-3-8B
    num_samples: 100
    output_parser: passthrough_output_parser
    prompt: trivia_qa_5_shot_with_description
    stop_tokens:
      - "

        "
      - .
      - ","
    task: trivia-qa
    temperature: 0.2
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/meta-llama_Meta-Llama-3-8B_trivia-qa-5-shot-with-description_sample100_20240507-210959
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/meta-llama_Meta-Llama-3-8B_trivia-qa-5-shot-with-description_sample100_20240507-210959
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/trivia-qa.npz
- cfg:
    batch_size: 64
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 512
    max_prompt_tokens: 1536
    model: microsoft/phi-2
    num_samples: 100
    output_parser: extract_first_function
    prompt: mbpp_single_testcase
    stop_tokens:
      - "

        class"
      - "

        assert"
      - '

        """'
      - "

        print"
      - "

        if"
      - "

        <|/"
      - "

        ```"
      - <|endoftext|>
    task: mbpp-all
    temperature: 0.2
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/microsoft-phi-2_mbpp-all
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/microsoft-phi-2_mbpp-all
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/mbpp-all.npz
- cfg:
    batch_size: 128
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 512
    max_prompt_tokens: 1536
    model: microsoft/phi-2
    num_samples: 100
    output_parser: gsm8k_first_number_from_last_line
    prompt: gsm8k_cot_zeroshot
    stop_tokens:
      - "Q:"
      - </s>
      - <|im_end|>
    task: gsm8k-main-test
    temperature: 0.2
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/microsoft-phi2_gsm
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/microsoft-phi2_gsm
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/gsm8k-main-test.npz
- cfg:
    batch_size: 128
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 32
    max_prompt_tokens: 1536
    model: microsoft/phi-2
    num_samples: 100
    output_parser: passthrough_output_parser
    prompt: trivia_qa_5_shot_with_description
    stop_tokens:
      - "

        "
      - .
      - ","
    task: trivia-qa
    temperature: 0.2
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/grid-search/microsoft-phi2_trivia
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/microsoft-phi2_trivia
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/trivia-qa.npz
