 generation
exp_name: serial_subtask
num_samples: 10
min_num_series: 5
max_num_series: 50
step_num_series: 5

task_names:
prompt_idxs: 

models: [
 claude-3-sonnet-20240229,
  claude-3-5-sonnet-20240620,
 gemini-1.5-pro-latest,
 gemini-1.0-pro-latest,
 o1-mini-2024-09-12,
  gpt-4o-2024-05-13,
 gpt-4-turbo-2024-04-09,
 gpt-3.5-turbo-0125,
] 
resume_model: 0 
system: 
max_tokens: 1536
seed: 42

evaluators: [serial_subtask_evaluator]
