{"trial": 22, "trial_seed": 63, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.588, "large_model_accuracy": 0.832, "small_model_cost": 3.4621799999999994e-05, "large_model_cost": 0.0006774519999999993, "hybrid_results": {"0.05": {"accuracy": 0.774, "large_model_usage": 0.8, "small_model_usage": 0.19999999999999996, "avg_lambda": 0.8600000000000094, "avg_cost": 0.0005671730000000001, "unrestricted": {"accuracy": 0.786, "large_model_usage": 0.8, "avg_cost": 0.000587377}, "random_baseline": {"accuracy": 0.78, "accuracy_std": 0.009165151389911688, "avg_cost": 0.0005713678600000003, "cost_std": 4.455228230337861e-06, "small_model_fraction": 0.17155230105866104}, "unrestricted_random_baseline": {"accuracy": 0.789, "accuracy_std": 0.0042190046219458014, "avg_cost": 0.0005903406800000002, "cost_std": 1.0742358134115667e-05, "small_model_fraction": 0.14012253935798186}}, "0.1": {"accuracy": 0.742, "large_model_usage": 0.522, "small_model_usage": 0.478, "avg_lambda": 0.7599999999999948, "avg_cost": 0.0003902490000000002, "unrestricted": {"accuracy": 0.744, "large_model_usage": 0.522, "avg_cost": 0.0003952570000000002}, "random_baseline": {"accuracy": 0.7156, "accuracy_std": 0.01519999999999999, "avg_cost": 0.0003832557400000002, "cost_std": 1.2560105225371336e-05, "small_model_fraction": 0.4467789472243206}, "unrestricted_random_baseline": {"accuracy": 0.716, "accuracy_std": 0.011207140580897529, "avg_cost": 0.0003897497000000003, "cost_std": 1.660874132624142e-05, "small_model_fraction": 0.43898839849154475}}, "0.15": {"accuracy": 0.67, "large_model_usage": 0.3, "small_model_usage": 0.7, "avg_lambda": 0.5500000000000052, "avg_cost": 0.00023576499999999987, "unrestricted": {"accuracy": 0.676, "large_model_usage": 0.3, "avg_cost": 0.0002454289999999998}, "random_baseline": {"accuracy": 0.6714, "accuracy_std": 0.008627861844049177, "avg_cost": 0.00023108665999999983, "cost_std": 1.2134471270244974e-05, "small_model_fraction": 0.6870974636848112}, "unrestricted_random_baseline": {"accuracy": 0.6682, "accuracy_std": 0.011847362575695896, "avg_cost": 0.00024404503999999987, "cost_std": 1.0912983435816246e-05, "small_model_fraction": 0.6720639447244389}}, "0.2": {"accuracy": 0.646, "large_model_usage": 0.216, "small_model_usage": 0.784, "avg_lambda": 0.4000000000000035, "avg_cost": 0.0001752609999999999, "unrestricted": {"accuracy": 0.656, "large_model_usage": 0.216, "avg_cost": 0.00018861699999999986}, "random_baseline": {"accuracy": 0.6444000000000001, "accuracy_std": 0.010836973747315263, "avg_cost": 0.00017110541999999996, "cost_std": 1.885742556945668e-05, "small_model_fraction": 0.7812187417454873}, "unrestricted_random_baseline": {"accuracy": 0.6466000000000001, "accuracy_std": 0.010547037498748177, "avg_cost": 0.0001864030199999999, "cost_std": 5.508947615978929e-06, "small_model_fraction": 0.7604418709637475}}, "0.25": {"accuracy": 0.614, "large_model_usage": 0.128, "small_model_usage": 0.872, "avg_lambda": 0.10999999999999954, "avg_cost": 0.00011158499999999997, "unrestricted": {"accuracy": 0.634, "large_model_usage": 0.128, "avg_cost": 0.00012659299999999996}, "random_baseline": {"accuracy": 0.6218, "accuracy_std": 0.004044749683231341, "avg_cost": 0.00010850859999999999, "cost_std": 1.0632654244354982e-05, "small_model_fraction": 0.880274448835789}, "unrestricted_random_baseline": {"accuracy": 0.6298, "accuracy_std": 0.007400000000000006, "avg_cost": 0.00013002281999999999, "cost_std": 6.491688889310705e-06, "small_model_fraction": 0.8569276925695153}}}}