method,split,accuracy,valid_precision,valid_recall,invalid_precision,invalid_recall,invalid_f1,macro_f1,false_accept_rate,false_reject_rate,n_evaluated,total_rows,coverage
answer_only,all,0.6666666666666666,0.3333333333333333,1.0,1.0,0.6,0.7499999999999999,0.625,0.4,0.0,3000,3000,1.0
unit_only,all,0.3333333333333333,0.2,1.0,1.0,0.2,0.33333333333333337,0.33333333333333337,0.8,0.0,3000,3000,1.0
numeric_plus_unit,all,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,3000,3000,1.0
step_aware,all,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,3000,3000,1.0
learned_baseline,test,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,900,900,1.0
