 generation
exp_name: subtask
num_samples: 20

task_names:
prompt_idxs:

models: [
 claude-3-sonnet-20240229,
 claude-3-5-sonnet-20240620,
 gemini-1.5-pro-latest,
 gemini-1.0-pro-latest,
  o1-mini-2024-09-12,
  gpt-4o-mini-2024-07-18,
 gpt-4o-2024-05-13,
 gpt-4-turbo-2024-04-09,
 gpt-3.5-turbo-0125,
] 
resume_model: 0 
system: 
max_tokens: 1536
seed: 42
num_workers: 100

evaluators: [subtask_evaluator]