exp_name: main
task_names: 
prompt_idxs: 

models: [
  gpt-4o-2024-08-06,
  gpt-4o-mini-2024-07-18,
 o1-mini-2024-09-12,
 o1-preview-2024-09-12,
  gemini-1.0-pro-latest,
  gemini-1.5-pro-latest,
  claude-3-5-sonnet-20240620,
  mixtral-8x22b-instruct,
  llama3-70b,
  llama3-8b,
 Qwen2-72B
] 
resume_model: 0 
system: 
max_tokens: 2048
seed: 12 
num_workers: 100

evaluators: [
  final_match,
  order_consistency,
  order_consistency_loose,
  order_consistency_list,
  order_consistency_first,
  order_invariant,
  order_invariant_loose,
]