{"trial": 26, "trial_seed": 67, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.597, "large_model_accuracy": 0.823, "small_model_cost": 3.469199999999999e-05, "large_model_cost": 0.0006775519999999998, "hybrid_results": {"0.05": {"accuracy": 0.804, "large_model_usage": 0.854, "small_model_usage": 0.14600000000000002, "avg_lambda": 0.8999999999999925, "avg_cost": 0.0006237943999999997, "unrestricted": {"accuracy": 0.804, "large_model_usage": 0.854, "avg_cost": 0.0006240343999999998}, "random_baseline": {"accuracy": 0.8168, "accuracy_std": 0.004489988864128722, "avg_cost": 0.0006276428400000002, "cost_std": 6.947143249307622e-06, "small_model_fraction": 0.0836225616775038}, "unrestricted_random_baseline": {"accuracy": 0.8151999999999999, "accuracy_std": 0.005946427498927364, "avg_cost": 0.0006304392200000003, "cost_std": 9.518777474634088e-06, "small_model_fraction": 0.08324923000342221}}, "0.1": {"accuracy": 0.758, "large_model_usage": 0.554, "small_model_usage": 0.44599999999999995, "avg_lambda": 0.7599999999999948, "avg_cost": 0.00041391839999999975, "unrestricted": {"accuracy": 0.768, "large_model_usage": 0.554, "avg_cost": 0.0004203223999999997}, "random_baseline": {"accuracy": 0.7418, "accuracy_std": 0.01063766891757777, "avg_cost": 0.00041208741999999995, "cost_std": 1.3951650089491249e-05, "small_model_fraction": 0.4100948884671626}, "unrestricted_random_baseline": {"accuracy": 0.7404, "accuracy_std": 0.01953048898517394, "avg_cost": 0.0004280350600000001, "cost_std": 1.307427943316199e-05, "small_model_fraction": 0.400133154963756}}, "0.15": {"accuracy": 0.732, "large_model_usage": 0.526, "small_model_usage": 0.474, "avg_lambda": 0.6999999999999935, "avg_cost": 0.0003767624, "unrestricted": {"accuracy": 0.758, "large_model_usage": 0.526, "avg_cost": 0.0004012103999999999}, "random_baseline": {"accuracy": 0.7356, "accuracy_std": 0.011959933110180853, "avg_cost": 0.00038198713999999985, "cost_std": 1.4892934129996072e-05, "small_model_fraction": 0.46789285380953843}, "unrestricted_random_baseline": {"accuracy": 0.7377999999999999, "accuracy_std": 0.00695413546028549, "avg_cost": 0.0004061361599999999, "cost_std": 1.4410931717220824e-05, "small_model_fraction": 0.429862800609775}}, "0.2": {"accuracy": 0.68, "large_model_usage": 0.266, "small_model_usage": 0.734, "avg_lambda": 0.5, "avg_cost": 0.00021633839999999995, "unrestricted": {"accuracy": 0.688, "large_model_usage": 0.266, "avg_cost": 0.00022322639999999998}, "random_baseline": {"accuracy": 0.6782, "accuracy_std": 0.008738420909981368, "avg_cost": 0.00022429225999999998, "cost_std": 9.394576784954181e-06, "small_model_fraction": 0.7174401891547149}, "unrestricted_random_baseline": {"accuracy": 0.673, "accuracy_std": 0.009305912099305463, "avg_cost": 0.00021713163999999996, "cost_std": 7.377991948924833e-06, "small_model_fraction": 0.7067255701085772}}, "0.25": {"accuracy": 0.65, "large_model_usage": 0.174, "small_model_usage": 0.8260000000000001, "avg_lambda": 0.31000000000000166, "avg_cost": 0.00014206239999999998, "unrestricted": {"accuracy": 0.672, "large_model_usage": 0.174, "avg_cost": 0.00015709440000000015}, "random_baseline": {"accuracy": 0.653, "accuracy_std": 0.008955445270895252, "avg_cost": 0.00014614238000000004, "cost_std": 9.419397383463574e-06, "small_model_fraction": 0.8329801200883551}, "unrestricted_random_baseline": {"accuracy": 0.6572000000000001, "accuracy_std": 0.010628264204469148, "avg_cost": 0.00015397332000000003, "cost_std": 1.2635256455236664e-05, "small_model_fraction": 0.8095971129017202}}}}