problem_id,category,sample_id,provider,model,temperature,api_temperature,ground_truth_value,ground_truth_unit,generation_status,sample_index,final_numeric_value,parsed_value,final_unit,parsed_unit,final_answer_text,parse_status,parse_source,multiple_final_answers,unit_ambiguous,needs_manual_review,numeric_correct_answer_only,unit_compatible,unit_parse_failed,numeric_plus_unit_correct,answer_only_accept_but_numeric_unit_reject,wrong_unit_error,missing_or_unparseable,converted_numeric_value,answer_only_accept,numeric_plus_unit_accept,gap_case,wrong_unit_gap,conversion_gap
P00455,Unit conversion,1,openai,gpt-4.1-mini,0.7,0.7,0.05,mg,success,1,0.05,0.05,mg,mg,0.05 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.05,True,True,False,False,False
P00455,Unit conversion,2,openai,gpt-4.1-mini,0.7,0.7,0.05,mg,success,2,0.05,0.05,mg,mg,0.05 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.05,True,True,False,False,False
P00455,Unit conversion,3,openai,gpt-4.1-mini,0.7,0.7,0.05,mg,success,3,0.05,0.05,mg,mg,0.05 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.05,True,True,False,False,False
P00455,Unit conversion,4,openai,gpt-4.1-mini,0.7,0.7,0.05,mg,success,4,0.05,0.05,mg,mg,0.05 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.05,True,True,False,False,False
P00455,Unit conversion,5,openai,gpt-4.1-mini,0.7,0.7,0.05,mg,success,5,0.05,0.05,mg,mg,0.05 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.05,True,True,False,False,False
P00291,Flow rate,1,openai,gpt-4.1-mini,0.7,0.7,358.4844,mL,success,1,358.58,358.58,mL,mL,358.58 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,358.58,True,True,False,False,False
P00291,Flow rate,2,openai,gpt-4.1-mini,0.7,0.7,358.4844,mL,success,2,358.41,358.41,mL,mL,358.41 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,358.41,True,True,False,False,False
P00291,Flow rate,3,openai,gpt-4.1-mini,0.7,0.7,358.4844,mL,success,3,358.53,358.53,mL,mL,358.53 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,358.53,True,True,False,False,False
P00291,Flow rate,4,openai,gpt-4.1-mini,0.7,0.7,358.4844,mL,success,4,358.51,358.51,mL,mL,358.51 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,358.51,True,True,False,False,False
P00291,Flow rate,5,openai,gpt-4.1-mini,0.7,0.7,358.4844,mL,success,5,358.49,358.49,mL,mL,358.49 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,358.49,True,True,False,False,False
P00340,Imaging scale area,1,openai,gpt-4.1-mini,0.7,0.7,8505.260334479999,mm^2,success,1,8501.5,8501.5,mm^2,mm^2,8501.5 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,8501.5,True,True,False,False,False
P00340,Imaging scale area,2,openai,gpt-4.1-mini,0.7,0.7,8505.260334479999,mm^2,success,2,8503.6,8503.6,mm^2,mm^2,8503.6 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,8503.6,True,True,False,False,False
P00340,Imaging scale area,3,openai,gpt-4.1-mini,0.7,0.7,8505.260334479999,mm^2,success,3,8505.5,8505.5,mm^2,mm^2,8505.5 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,8505.5,True,True,False,False,False
P00340,Imaging scale area,4,openai,gpt-4.1-mini,0.7,0.7,8505.260334479999,mm^2,success,4,8500.3,8500.3,mm^2,mm^2,8500.3 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,8500.3,True,True,False,False,False
P00340,Imaging scale area,5,openai,gpt-4.1-mini,0.7,0.7,8505.260334479999,mm^2,success,5,8503.04,8503.04,mm^2,mm^2,8503.04 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,8503.04,True,True,False,False,False
P00283,Flow rate,1,openai,gpt-4.1-mini,0.7,0.7,770.9948999999999,mL,success,1,770.81,770.81,mL,mL,770.81 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,770.81,True,True,False,False,False
P00283,Flow rate,2,openai,gpt-4.1-mini,0.7,0.7,770.9948999999999,mL,success,2,770.83,770.83,mL,mL,770.83 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,770.83,True,True,False,False,False
P00283,Flow rate,3,openai,gpt-4.1-mini,0.7,0.7,770.9948999999999,mL,success,3,770.54,770.54,mL,mL,770.54 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,770.54,True,True,False,False,False
P00283,Flow rate,4,openai,gpt-4.1-mini,0.7,0.7,770.9948999999999,mL,success,4,770.56,770.56,mL,mL,770.56 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,770.56,True,True,False,False,False
P00283,Flow rate,5,openai,gpt-4.1-mini,0.7,0.7,770.9948999999999,mL,success,5,770.8,770.8,mL,mL,770.8 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,770.8,True,True,False,False,False
P00098,Dilution,1,openai,gpt-4.1-mini,0.7,0.7,9.35077,mL,success,1,9.35,9.35,mL,mL,9.35 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,9.35,True,True,False,False,False
P00098,Dilution,2,openai,gpt-4.1-mini,0.7,0.7,9.35077,mL,success,2,9.35,9.35,mL,mL,9.35 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,9.35,True,True,False,False,False
P00098,Dilution,3,openai,gpt-4.1-mini,0.7,0.7,9.35077,mL,success,3,9.35,9.35,mL,mL,9.35 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,9.35,True,True,False,False,False
P00098,Dilution,4,openai,gpt-4.1-mini,0.7,0.7,9.35077,mL,success,4,9.35,9.35,mL,mL,9.35 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,9.35,True,True,False,False,False
P00098,Dilution,5,openai,gpt-4.1-mini,0.7,0.7,9.35077,mL,success,5,9.35,9.35,mL,mL,9.35 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,9.35,True,True,False,False,False
P00048,Dosage,1,openai,gpt-4.1-mini,0.7,0.7,617.924,mg,success,1,617.924,617.924,mg,mg,617.924 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,617.924,True,True,False,False,False
P00048,Dosage,2,openai,gpt-4.1-mini,0.7,0.7,617.924,mg,success,2,617.924,617.924,mg,mg,617.924 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,617.924,True,True,False,False,False
P00048,Dosage,3,openai,gpt-4.1-mini,0.7,0.7,617.924,mg,success,3,617.524,617.524,mg,mg,617.524 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,617.524,True,True,False,False,False
P00048,Dosage,4,openai,gpt-4.1-mini,0.7,0.7,617.924,mg,success,4,617.924,617.924,mg,mg,617.924 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,617.924,True,True,False,False,False
P00048,Dosage,5,openai,gpt-4.1-mini,0.7,0.7,617.924,mg,success,5,617.924,617.924,mg,mg,617.924 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,617.924,True,True,False,False,False
P00499,Unit conversion,1,openai,gpt-4.1-mini,0.7,0.7,0.25,L,success,1,0.25,0.25,L,L,0.25 L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.25,True,True,False,False,False
P00499,Unit conversion,2,openai,gpt-4.1-mini,0.7,0.7,0.25,L,success,2,0.25,0.25,L,L,0.25 L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.25,True,True,False,False,False
P00499,Unit conversion,3,openai,gpt-4.1-mini,0.7,0.7,0.25,L,success,3,0.25,0.25,L,L,0.25 L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.25,True,True,False,False,False
P00499,Unit conversion,4,openai,gpt-4.1-mini,0.7,0.7,0.25,L,success,4,0.25,0.25,L,L,0.25 L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.25,True,True,False,False,False
P00499,Unit conversion,5,openai,gpt-4.1-mini,0.7,0.7,0.25,L,success,5,0.25,0.25,L,L,0.25 L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.25,True,True,False,False,False
P00435,Molarity,1,openai,gpt-4.1-mini,0.7,0.7,18.635272200000003,mmol,success,1,18628.94,18628.94,mmol,mmol,18628.94 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,18628.94,False,False,False,False,False
P00435,Molarity,2,openai,gpt-4.1-mini,0.7,0.7,18.635272200000003,mmol,success,2,18634.3,18634.3,mmol,mmol,18634.3 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,18634.3,False,False,False,False,False
P00435,Molarity,3,openai,gpt-4.1-mini,0.7,0.7,18.635272200000003,mmol,success,3,18631.38,18631.38,mmol,mmol,18631.38 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,18631.38,False,False,False,False,False
P00435,Molarity,4,openai,gpt-4.1-mini,0.7,0.7,18.635272200000003,mmol,success,4,18634.5,18634.5,mmol,mmol,18634.5 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,18634.5,False,False,False,False,False
P00435,Molarity,5,openai,gpt-4.1-mini,0.7,0.7,18.635272200000003,mmol,success,5,18632.5,18632.5,mmol,mmol,18632.5 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,18632.5,False,False,False,False,False
P00210,Exponential growth,1,openai,gpt-4.1-mini,0.7,0.7,231991.48540931472,cells,success,1,231000.0,231000.0,cells,cells,231000 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,231000.0,True,True,False,False,False
P00210,Exponential growth,2,openai,gpt-4.1-mini,0.7,0.7,231991.48540931472,cells,success,2,231500.0,231500.0,cells,cells,231500 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,231500.0,True,True,False,False,False
P00210,Exponential growth,3,openai,gpt-4.1-mini,0.7,0.7,231991.48540931472,cells,success,3,231500.0,231500.0,cells,cells,231500 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,231500.0,True,True,False,False,False
P00210,Exponential growth,4,openai,gpt-4.1-mini,0.7,0.7,231991.48540931472,cells,success,4,230500.0,230500.0,cells,cells,230500 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,230500.0,True,True,False,False,False
P00210,Exponential growth,5,openai,gpt-4.1-mini,0.7,0.7,231991.48540931472,cells,success,5,231000.0,231000.0,cells,cells,231000 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,231000.0,True,True,False,False,False
P00364,Bioink weight/volume percent,1,openai,gpt-4.1-mini,0.7,0.7,5.786639999999999,g,success,1,5.79,5.79,g,g,5.79 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,5.79,True,True,False,False,False
P00364,Bioink weight/volume percent,2,openai,gpt-4.1-mini,0.7,0.7,5.786639999999999,g,success,2,5.78,5.78,g,g,5.78 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,5.78,True,True,False,False,False
P00364,Bioink weight/volume percent,3,openai,gpt-4.1-mini,0.7,0.7,5.786639999999999,g,success,3,5.78,5.78,g,g,5.78 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,5.78,True,True,False,False,False
P00364,Bioink weight/volume percent,4,openai,gpt-4.1-mini,0.7,0.7,5.786639999999999,g,success,4,5.79,5.79,g,g,5.79 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,5.79,True,True,False,False,False
P00364,Bioink weight/volume percent,5,openai,gpt-4.1-mini,0.7,0.7,5.786639999999999,g,success,5,5.785,5.785,g,g,5.785 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,5.785,True,True,False,False,False
P00137,Cell count,1,openai,gpt-4.1-mini,0.7,0.7,1940000.0,cells,success,1,1940000.0,1940000.0,cells,cells,1.94e6 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1940000.0,True,True,False,False,False
P00137,Cell count,2,openai,gpt-4.1-mini,0.7,0.7,1940000.0,cells,success,2,1940000.0,1940000.0,cells,cells,1.94e6 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1940000.0,True,True,False,False,False
P00137,Cell count,3,openai,gpt-4.1-mini,0.7,0.7,1940000.0,cells,success,3,1940000.0,1940000.0,cells,cells,1.94e+6 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1940000.0,True,True,False,False,False
P00137,Cell count,4,openai,gpt-4.1-mini,0.7,0.7,1940000.0,cells,success,4,1940000.0,1940000.0,cells,cells,1.94e6 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1940000.0,True,True,False,False,False
P00137,Cell count,5,openai,gpt-4.1-mini,0.7,0.7,1940000.0,cells,success,5,1940000.0,1940000.0,cells,cells,1.94e+6 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1940000.0,True,True,False,False,False
P00293,Flow rate,1,openai,gpt-4.1-mini,0.7,0.7,134.9898,mL,success,1,134.94,134.94,mL,mL,134.94 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,134.94,True,True,False,False,False
P00293,Flow rate,2,openai,gpt-4.1-mini,0.7,0.7,134.9898,mL,success,2,134.91,134.91,mL,mL,134.91 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,134.91,True,True,False,False,False
P00293,Flow rate,3,openai,gpt-4.1-mini,0.7,0.7,134.9898,mL,success,3,134.94,134.94,mL,mL,134.94 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,134.94,True,True,False,False,False
P00293,Flow rate,4,openai,gpt-4.1-mini,0.7,0.7,134.9898,mL,success,4,134.93,134.93,mL,mL,134.93 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,134.93,True,True,False,False,False
P00293,Flow rate,5,openai,gpt-4.1-mini,0.7,0.7,134.9898,mL,success,5,134.93,134.93,mL,mL,134.93 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,134.93,True,True,False,False,False
P00129,Cell count,1,openai,gpt-4.1-mini,0.7,0.7,2400000.0,cells,success,1,2400000.0,2400000.0,cells,cells,2.4e6 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2400000.0,True,True,False,False,False
P00129,Cell count,2,openai,gpt-4.1-mini,0.7,0.7,2400000.0,cells,success,2,2400000.0,2400000.0,cells,cells,2.4e6 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2400000.0,True,True,False,False,False
P00129,Cell count,3,openai,gpt-4.1-mini,0.7,0.7,2400000.0,cells,success,3,2400000.0,2400000.0,cells,cells,2.4e+6 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2400000.0,True,True,False,False,False
P00129,Cell count,4,openai,gpt-4.1-mini,0.7,0.7,2400000.0,cells,success,4,2400000.0,2400000.0,cells,cells,2.4e+6 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2400000.0,True,True,False,False,False
P00129,Cell count,5,openai,gpt-4.1-mini,0.7,0.7,2400000.0,cells,success,5,2400000.0,2400000.0,cells,cells,2.4e6 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2400000.0,True,True,False,False,False
P00215,Exponential growth,1,openai,gpt-4.1-mini,0.7,0.7,94454.16491479974,cells,success,1,94850.0,94850.0,cells,cells,94850 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,94850.0,True,True,False,False,False
P00215,Exponential growth,2,openai,gpt-4.1-mini,0.7,0.7,94454.16491479974,cells,success,2,94350.0,94350.0,cells,cells,94350 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,94350.0,True,True,False,False,False
P00215,Exponential growth,3,openai,gpt-4.1-mini,0.7,0.7,94454.16491479974,cells,success,3,94650.0,94650.0,cells,cells,94650 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,94650.0,True,True,False,False,False
P00215,Exponential growth,4,openai,gpt-4.1-mini,0.7,0.7,94454.16491479974,cells,success,4,94900.0,94900.0,cells,cells,94900 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,94900.0,True,True,False,False,False
P00215,Exponential growth,5,openai,gpt-4.1-mini,0.7,0.7,94454.16491479974,cells,success,5,94850.0,94850.0,cells,cells,94850 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,94850.0,True,True,False,False,False
P00003,Dosage,1,openai,gpt-4.1-mini,0.7,0.7,695.875,mg,success,1,695.875,695.875,mg,mg,695.875 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,695.875,True,True,False,False,False
P00003,Dosage,2,openai,gpt-4.1-mini,0.7,0.7,695.875,mg,success,2,695.875,695.875,mg,mg,695.875 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,695.875,True,True,False,False,False
P00003,Dosage,3,openai,gpt-4.1-mini,0.7,0.7,695.875,mg,success,3,695.875,695.875,mg,mg,695.875 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,695.875,True,True,False,False,False
P00003,Dosage,4,openai,gpt-4.1-mini,0.7,0.7,695.875,mg,success,4,695.875,695.875,mg,mg,695.875 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,695.875,True,True,False,False,False
P00003,Dosage,5,openai,gpt-4.1-mini,0.7,0.7,695.875,mg,success,5,695.875,695.875,mg,mg,695.875 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,695.875,True,True,False,False,False
P00332,Imaging scale area,1,openai,gpt-4.1-mini,0.7,0.7,1999.8144,mm^2,success,1,1999.46,1999.46,mm^2,mm^2,1999.46 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1999.46,True,True,False,False,False
P00332,Imaging scale area,2,openai,gpt-4.1-mini,0.7,0.7,1999.8144,mm^2,success,2,1999.34,1999.34,mm^2,mm^2,1999.34 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1999.34,True,True,False,False,False
P00332,Imaging scale area,3,openai,gpt-4.1-mini,0.7,0.7,1999.8144,mm^2,success,3,2000.54,2000.54,mm^2,mm^2,2000.54 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2000.54,True,True,False,False,False
P00332,Imaging scale area,4,openai,gpt-4.1-mini,0.7,0.7,1999.8144,mm^2,success,4,1999.42,1999.42,mm^2,mm^2,1999.42 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1999.42,True,True,False,False,False
P00332,Imaging scale area,5,openai,gpt-4.1-mini,0.7,0.7,1999.8144,mm^2,success,5,2000.54,2000.54,mm^2,mm^2,2000.54 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2000.54,True,True,False,False,False
P00390,Bioink weight/volume percent,1,openai,gpt-4.1-mini,0.7,0.7,4.236319999999999,g,success,1,4.24,4.24,g,g,4.24 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,4.24,True,True,False,False,False
P00390,Bioink weight/volume percent,2,openai,gpt-4.1-mini,0.7,0.7,4.236319999999999,g,success,2,4.24,4.24,g,g,4.24 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,4.24,True,True,False,False,False
P00390,Bioink weight/volume percent,3,openai,gpt-4.1-mini,0.7,0.7,4.236319999999999,g,success,3,4.24,4.24,g,g,4.24 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,4.24,True,True,False,False,False
P00390,Bioink weight/volume percent,4,openai,gpt-4.1-mini,0.7,0.7,4.236319999999999,g,success,4,4.24,4.24,g,g,4.24 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,4.24,True,True,False,False,False
P00390,Bioink weight/volume percent,5,openai,gpt-4.1-mini,0.7,0.7,4.236319999999999,g,success,5,4.24,4.24,g,g,4.24 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,4.24,True,True,False,False,False
P00381,Bioink weight/volume percent,1,openai,gpt-4.1-mini,0.7,0.7,8.73059,g,success,1,8.73,8.73,g,g,8.73 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,8.73,True,True,False,False,False
P00381,Bioink weight/volume percent,2,openai,gpt-4.1-mini,0.7,0.7,8.73059,g,success,2,8.73,8.73,g,g,8.73 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,8.73,True,True,False,False,False
P00381,Bioink weight/volume percent,3,openai,gpt-4.1-mini,0.7,0.7,8.73059,g,success,3,8.73,8.73,g,g,8.73 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,8.73,True,True,False,False,False
P00381,Bioink weight/volume percent,4,openai,gpt-4.1-mini,0.7,0.7,8.73059,g,success,4,8.73,8.73,g,g,8.73 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,8.73,True,True,False,False,False
P00381,Bioink weight/volume percent,5,openai,gpt-4.1-mini,0.7,0.7,8.73059,g,success,5,8.73,8.73,g,g,8.73 g,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,8.73,True,True,False,False,False
P00437,Molarity,1,openai,gpt-4.1-mini,0.7,0.7,74.47133850000002,mmol,success,1,74432.63,74432.63,mmol,mmol,74432.63 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,74432.63,False,False,False,False,False
P00437,Molarity,2,openai,gpt-4.1-mini,0.7,0.7,74.47133850000002,mmol,success,2,74430.83,74430.83,mmol,mmol,74430.83 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,74430.83,False,False,False,False,False
P00437,Molarity,3,openai,gpt-4.1-mini,0.7,0.7,74.47133850000002,mmol,success,3,74448.42,74448.42,mmol,mmol,74448.42 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,74448.42,False,False,False,False,False
P00437,Molarity,4,openai,gpt-4.1-mini,0.7,0.7,74.47133850000002,mmol,success,4,74447.8,74447.8,mmol,mmol,74447.8 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,74447.8,False,False,False,False,False
P00437,Molarity,5,openai,gpt-4.1-mini,0.7,0.7,74.47133850000002,mmol,success,5,74433.56,74433.56,mmol,mmol,74433.56 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,74433.56,False,False,False,False,False
P00120,Cell count,1,openai,gpt-4.1-mini,0.7,0.7,2405000.0,cells,success,1,2405000.0,2405000.0,cells,cells,"2,405,000 cells",parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2405000.0,True,True,False,False,False
P00120,Cell count,2,openai,gpt-4.1-mini,0.7,0.7,2405000.0,cells,success,2,2405000.0,2405000.0,cells,cells,"2,405,000 cells",parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2405000.0,True,True,False,False,False
P00120,Cell count,3,openai,gpt-4.1-mini,0.7,0.7,2405000.0,cells,success,3,2405000.0,2405000.0,cells,cells,"2,405,000 cells",parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2405000.0,True,True,False,False,False
P00120,Cell count,4,openai,gpt-4.1-mini,0.7,0.7,2405000.0,cells,success,4,2405000.0,2405000.0,cells,cells,"2,405,000 cells",parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2405000.0,True,True,False,False,False
P00120,Cell count,5,openai,gpt-4.1-mini,0.7,0.7,2405000.0,cells,success,5,2405000.0,2405000.0,cells,cells,"2,405,000 cells",parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2405000.0,True,True,False,False,False
P00326,Imaging scale area,1,openai,gpt-4.1-mini,0.7,0.7,52.1676376,mm^2,success,1,52.17,52.17,mm^2,mm^2,52.17 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,52.17,True,True,False,False,False
P00326,Imaging scale area,2,openai,gpt-4.1-mini,0.7,0.7,52.1676376,mm^2,success,2,52.16,52.16,mm^2,mm^2,52.16 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,52.16,True,True,False,False,False
P00326,Imaging scale area,3,openai,gpt-4.1-mini,0.7,0.7,52.1676376,mm^2,success,3,52.13,52.13,mm^2,mm^2,52.13 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,52.13,True,True,False,False,False
P00326,Imaging scale area,4,openai,gpt-4.1-mini,0.7,0.7,52.1676376,mm^2,success,4,52.14,52.14,mm^2,mm^2,52.14 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,52.14,True,True,False,False,False
P00326,Imaging scale area,5,openai,gpt-4.1-mini,0.7,0.7,52.1676376,mm^2,success,5,52.154,52.154,mm^2,mm^2,52.154 mm^2,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,52.154,True,True,False,False,False
P00402,Molarity,1,openai,gpt-4.1-mini,0.7,0.7,19.4687844,mmol,success,1,19478.13,19478.13,mmol,mmol,19478.13 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,19478.13,False,False,False,False,False
P00402,Molarity,2,openai,gpt-4.1-mini,0.7,0.7,19.4687844,mmol,success,2,19473.92,19473.92,mmol,mmol,19473.92 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,19473.92,False,False,False,False,False
P00402,Molarity,3,openai,gpt-4.1-mini,0.7,0.7,19.4687844,mmol,success,3,19477.55,19477.55,mmol,mmol,19477.55 mmol,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,19477.55,False,False,False,False,False
P00402,Molarity,4,openai,gpt-4.1-mini,0.7,0.7,19.4687844,mmol,success,4,19.47,19.47,mmol,mmol,19.47 mmol,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,19.47,True,True,False,False,False
P00402,Molarity,5,openai,gpt-4.1-mini,0.7,0.7,19.4687844,mmol,success,5,19.47,19.47,mmol,mmol,19.47 mmol,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,19.47,True,True,False,False,False
P00159,Half-life decay,1,openai,gpt-4.1-mini,0.7,0.7,6.946792578210926,mg/L,success,1,6.93,6.93,mg/L,mg/L,6.93 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,6.93,True,True,False,False,False
P00159,Half-life decay,2,openai,gpt-4.1-mini,0.7,0.7,6.946792578210926,mg/L,success,2,6.93,6.93,mg/L,mg/L,6.93 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,6.93,True,True,False,False,False
P00159,Half-life decay,3,openai,gpt-4.1-mini,0.7,0.7,6.946792578210926,mg/L,success,3,6.94,6.94,mg/L,mg/L,6.94 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,6.94,True,True,False,False,False
P00159,Half-life decay,4,openai,gpt-4.1-mini,0.7,0.7,6.946792578210926,mg/L,success,4,6.93,6.93,mg/L,mg/L,6.93 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,6.93,True,True,False,False,False
P00159,Half-life decay,5,openai,gpt-4.1-mini,0.7,0.7,6.946792578210926,mg/L,success,5,6.47,6.47,mg/L,mg/L,6.47 mg/L,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,6.47,False,False,False,False,False
P00498,Unit conversion,1,openai,gpt-4.1-mini,0.7,0.7,2.0,L,success,1,2.0,2.0,L,L,2 L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2.0,True,True,False,False,False
P00498,Unit conversion,2,openai,gpt-4.1-mini,0.7,0.7,2.0,L,success,2,2.0,2.0,L,L,2 L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2.0,True,True,False,False,False
P00498,Unit conversion,3,openai,gpt-4.1-mini,0.7,0.7,2.0,L,success,3,2.0,2.0,L,L,2 L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2.0,True,True,False,False,False
P00498,Unit conversion,4,openai,gpt-4.1-mini,0.7,0.7,2.0,L,success,4,2.0,2.0,L,L,2 L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2.0,True,True,False,False,False
P00498,Unit conversion,5,openai,gpt-4.1-mini,0.7,0.7,2.0,L,success,5,2.0,2.0,L,L,2 L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,2.0,True,True,False,False,False
P00196,Half-life decay,1,openai,gpt-4.1-mini,0.7,0.7,0.0345668551431944,mg/L,success,1,0.0266,0.0266,mg/L,mg/L,0.0266 mg/L,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,0.0266,False,False,False,False,False
P00196,Half-life decay,2,openai,gpt-4.1-mini,0.7,0.7,0.0345668551431944,mg/L,success,2,0.0423,0.0423,mg/L,mg/L,0.0423 mg/L,parsed,final_answer_line,False,False,False,False,True,False,False,False,False,False,0.0423,False,False,False,False,False
P00196,Half-life decay,3,openai,gpt-4.1-mini,0.7,0.7,0.0345668551431944,mg/L,success,3,0.0347,0.0347,mg/L,mg/L,0.0347 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.0347,True,True,False,False,False
P00196,Half-life decay,4,openai,gpt-4.1-mini,0.7,0.7,0.0345668551431944,mg/L,success,4,0.0345,0.0345,mg/L,mg/L,0.0345 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.0345,True,True,False,False,False
P00196,Half-life decay,5,openai,gpt-4.1-mini,0.7,0.7,0.0345668551431944,mg/L,success,5,0.0345,0.0345,mg/L,mg/L,0.0345 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,0.0345,True,True,False,False,False
P00057,Dilution,1,openai,gpt-4.1-mini,0.7,0.7,27.58316,mL,success,1,27.59,27.59,mL,mL,27.59 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,27.59,True,True,False,False,False
P00057,Dilution,2,openai,gpt-4.1-mini,0.7,0.7,27.58316,mL,success,2,27.58,27.58,mL,mL,27.58 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,27.58,True,True,False,False,False
P00057,Dilution,3,openai,gpt-4.1-mini,0.7,0.7,27.58316,mL,success,3,27.59,27.59,mL,mL,27.59 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,27.59,True,True,False,False,False
P00057,Dilution,4,openai,gpt-4.1-mini,0.7,0.7,27.58316,mL,success,4,27.58,27.58,mL,mL,27.58 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,27.58,True,True,False,False,False
P00057,Dilution,5,openai,gpt-4.1-mini,0.7,0.7,27.58316,mL,success,5,27.59,27.59,mL,mL,27.59 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,27.59,True,True,False,False,False
P00008,Dosage,1,openai,gpt-4.1-mini,0.7,0.7,286.145,mg,success,1,286.045,286.045,mg,mg,286.045 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,286.045,True,True,False,False,False
P00008,Dosage,2,openai,gpt-4.1-mini,0.7,0.7,286.145,mg,success,2,286.15,286.15,mg,mg,286.15 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,286.15,True,True,False,False,False
P00008,Dosage,3,openai,gpt-4.1-mini,0.7,0.7,286.145,mg,success,3,286.145,286.145,mg,mg,286.145 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,286.145,True,True,False,False,False
P00008,Dosage,4,openai,gpt-4.1-mini,0.7,0.7,286.145,mg,success,4,286.345,286.345,mg,mg,286.345 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,286.345,True,True,False,False,False
P00008,Dosage,5,openai,gpt-4.1-mini,0.7,0.7,286.145,mg,success,5,286.145,286.145,mg,mg,286.145 mg,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,286.145,True,True,False,False,False
P00204,Exponential growth,1,openai,gpt-4.1-mini,0.7,0.7,60175.52523998233,cells,success,1,60400.0,60400.0,cells,cells,60400 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,60400.0,True,True,False,False,False
P00204,Exponential growth,2,openai,gpt-4.1-mini,0.7,0.7,60175.52523998233,cells,success,2,60160.0,60160.0,cells,cells,60160 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,60160.0,True,True,False,False,False
P00204,Exponential growth,3,openai,gpt-4.1-mini,0.7,0.7,60175.52523998233,cells,success,3,60400.0,60400.0,cells,cells,60400 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,60400.0,True,True,False,False,False
P00204,Exponential growth,4,openai,gpt-4.1-mini,0.7,0.7,60175.52523998233,cells,success,4,60200.0,60200.0,cells,cells,60200 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,60200.0,True,True,False,False,False
P00204,Exponential growth,5,openai,gpt-4.1-mini,0.7,0.7,60175.52523998233,cells,success,5,60400.0,60400.0,cells,cells,60400 cells,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,60400.0,True,True,False,False,False
P00154,Half-life decay,1,openai,gpt-4.1-mini,0.7,0.7,3.1694976873608303,mg/L,success,1,3.2,3.2,mg/L,mg/L,3.20 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,3.2,True,True,False,False,False
P00154,Half-life decay,2,openai,gpt-4.1-mini,0.7,0.7,3.1694976873608303,mg/L,success,2,3.2,3.2,mg/L,mg/L,3.20 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,3.2,True,True,False,False,False
P00154,Half-life decay,3,openai,gpt-4.1-mini,0.7,0.7,3.1694976873608303,mg/L,success,3,3.14,3.14,mg/L,mg/L,3.14 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,3.14,True,True,False,False,False
P00154,Half-life decay,4,openai,gpt-4.1-mini,0.7,0.7,3.1694976873608303,mg/L,success,4,3.14,3.14,mg/L,mg/L,3.14 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,3.14,True,True,False,False,False
P00154,Half-life decay,5,openai,gpt-4.1-mini,0.7,0.7,3.1694976873608303,mg/L,success,5,3.16,3.16,mg/L,mg/L,3.16 mg/L,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,3.16,True,True,False,False,False
P00088,Dilution,1,openai,gpt-4.1-mini,0.7,0.7,1.3368000000000002,mL,success,1,1.34,1.34,mL,mL,1.34 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1.34,True,True,False,False,False
P00088,Dilution,2,openai,gpt-4.1-mini,0.7,0.7,1.3368000000000002,mL,success,2,1.34,1.34,mL,mL,1.34 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1.34,True,True,False,False,False
P00088,Dilution,3,openai,gpt-4.1-mini,0.7,0.7,1.3368000000000002,mL,success,3,1.34,1.34,mL,mL,1.34 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1.34,True,True,False,False,False
P00088,Dilution,4,openai,gpt-4.1-mini,0.7,0.7,1.3368000000000002,mL,success,4,1.34,1.34,mL,mL,1.34 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1.34,True,True,False,False,False
P00088,Dilution,5,openai,gpt-4.1-mini,0.7,0.7,1.3368000000000002,mL,success,5,1.34,1.34,mL,mL,1.34 mL,parsed,final_answer_line,False,False,False,True,True,False,True,False,False,False,1.34,True,True,False,False,False
