- cfg:
    logprobs: true
    model: gpt-4o-mini
    num_samples: 25
    system_prompt: Answer the following question. In your response, only write the
      raw code, do not use markdown and do not add explanations
    task: ds1000-simple
    temperature: 0.2
    top_p: 0.95
    user_prompt: passthrough
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/gpt-4o-mini_ds1000-simple_20240730074759
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/ds1000-simple.npz
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/gpt-4o-mini_ds1000-simple_20240730074759
- cfg:
    logprobs: true
    model: gpt-4o-mini
    num_samples: 25
    system_prompt: null
    task: gsm8k-main-test
    temperature: 0.2
    top_p: 0.95
    user_prompt: gsm8k_cot_zeroshot
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/gpt-4o-mini_gsm8k-main-test_20240731052709
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/gsm8k-main-test.npz
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/gpt-4o-mini_gsm8k-main-test_20240731052709
- cfg:
    logprobs: true
    model: gpt-4o-mini
    num_samples: 25
    system_prompt: Answer the following questions. Use the answer format provided
      in the examples. All of your latex expressions must be wrapped in $...$ (for
      example, to write 'x=2' as latex, write $x=2$).
    task: math-test
    temperature: 0.6
    top_p: 0.95
    user_prompt: math-4-shot
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/gpt-4o-mini_math-test_20240730181855
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/math-test.npz
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/gpt-4o-mini_math-test_20240730181855
- cfg:
    logprobs: true
    model: gpt-4o-mini
    num_samples: 25
    system_prompt: Answer the following question. In your response, only write the
      raw code, do not use markdown and do not add explanations.
    task: mbpp-all
    temperature: 0.2
    top_p: 0.95
    user_prompt: mbpp_single_testcase
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/gpt-4o-mini_mbpp-all_20240730075941
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/mbpp-all.npz
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/gpt-4o-mini_mbpp-all_20240730075941
- cfg:
    batch_size: 128
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 512
    max_prompt_tokens: 1536
    model: meta-llama/Meta-Llama-3-8B
    num_samples: 100
    output_parser: gsm8k_first_number_from_last_line
    prompt: gsm8k_cot_zeroshot
    stop_tokens:
    - 'Q:'
    - </s>
    - <|im_end|>
    task: gsm8k-main-test
    temperature: 0.2
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/meta-llama_Meta-Llama-3-8B_gsm8k-test-cot-zeroshot_sample100_20240507-134608
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/gsm8k-main-test.npz
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/meta-llama_Meta-Llama-3-8B_gsm8k-test-cot-zeroshot_sample100_20240507-134608
- cfg:
    batch_size: 64
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 512
    max_prompt_tokens: 1536
    model: meta-llama/Meta-Llama-3-8B
    num_samples: 100
    output_parser: extract_first_function
    prompt: mbpp_single_testcase
    stop_tokens:
    - '

      class'
    - '

      assert'
    - '

      """'
    - '

      print'
    - '

      if'
    - '

      <|/'
    - '

      ```'
    - <|endoftext|>
    task: mbpp-all
    temperature: 0.2
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/meta-llama_Meta-Llama-3-8B_mbpp-all
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/mbpp-all.npz
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/meta-llama_Meta-Llama-3-8B_mbpp-all
- cfg:
    batch_size: 64
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 512
    max_prompt_tokens: 1536
    model: microsoft/phi-2
    num_samples: 100
    output_parser: extract_first_function
    prompt: mbpp_single_testcase
    stop_tokens:
    - '

      class'
    - '

      assert'
    - '

      """'
    - '

      print'
    - '

      if'
    - '

      <|/'
    - '

      ```'
    - <|endoftext|>
    task: mbpp-all
    temperature: 0.2
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/microsoft-phi-2_mbpp-all
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/mbpp-all.npz
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/microsoft-phi-2_mbpp-all
- cfg:
    batch_size: 128
    concurrency: -1
    enforce_eager: false
    limit: -1
    max_new_tokens: 512
    max_prompt_tokens: 1536
    model: microsoft/phi-2
    num_samples: 100
    output_parser: gsm8k_first_number_from_last_line
    prompt: gsm8k_cot_zeroshot
    stop_tokens:
    - 'Q:'
    - </s>
    - <|im_end|>
    task: gsm8k-main-test
    temperature: 0.2
    tensor_parallel_size: 1
    top_k: -1
    top_p: 0.95
    trust_remote_code: true
  infer: gs://uncertainty-for-programs/aaai-experiments/infer-runs/microsoft-phi2_gsm
  splits: gs://uncertainty-for-programs/aaai-experiments/splits/gsm8k-main-test.npz
  train: gs://uncertainty-for-programs/aaai-experiments/train-runs/microsoft-phi2_gsm
