{"trial": 10, "trial_seed": 51, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.586, "large_model_accuracy": 0.846, "small_model_cost": 3.482450000000001e-05, "large_model_cost": 0.0006813299999999989, "hybrid_results": {"0.05": {"accuracy": 0.788, "large_model_usage": 0.808, "small_model_usage": 0.19199999999999995, "avg_lambda": 0.8999999999999925, "avg_cost": 0.0005917324000000003, "unrestricted": {"accuracy": 0.788, "large_model_usage": 0.808, "avg_cost": 0.0005923284000000003}, "random_baseline": {"accuracy": 0.8074, "accuracy_std": 0.008581375181169958, "avg_cost": 0.0005901812199999994, "cost_std": 9.902982396207653e-06, "small_model_fraction": 0.13858752941776784}, "unrestricted_random_baseline": {"accuracy": 0.8118000000000001, "accuracy_std": 0.0072360210060501765, "avg_cost": 0.0005913463999999993, "cost_std": 7.055481344883437e-06, "small_model_fraction": 0.13766565017621465}}, "0.1": {"accuracy": 0.734, "large_model_usage": 0.522, "small_model_usage": 0.478, "avg_lambda": 0.7599999999999948, "avg_cost": 0.0003916124000000005, "unrestricted": {"accuracy": 0.738, "large_model_usage": 0.522, "avg_cost": 0.0003998244000000007}, "random_baseline": {"accuracy": 0.7134, "accuracy_std": 0.011934822998268564, "avg_cost": 0.00038787732000000025, "cost_std": 1.1485937723042026e-05, "small_model_fraction": 0.4481285928735314}, "unrestricted_random_baseline": {"accuracy": 0.7227999999999999, "accuracy_std": 0.014675149062275326, "avg_cost": 0.0003986583200000002, "cost_std": 1.8570588585653317e-05, "small_model_fraction": 0.43542645808890823}}, "0.15": {"accuracy": 0.698, "large_model_usage": 0.362, "small_model_usage": 0.638, "avg_lambda": 0.6699999999999996, "avg_cost": 0.0002820803999999998, "unrestricted": {"accuracy": 0.698, "large_model_usage": 0.362, "avg_cost": 0.0002866683999999998}, "random_baseline": {"accuracy": 0.6732, "accuracy_std": 0.010476640682967022, "avg_cost": 0.00027268997999999994, "cost_std": 1.15210807581408e-05, "small_model_fraction": 0.617550198722207}, "unrestricted_random_baseline": {"accuracy": 0.6758, "accuracy_std": 0.00750732975164937, "avg_cost": 0.00028918912, "cost_std": 1.316901754154816e-05, "small_model_fraction": 0.610453584694948}}, "0.2": {"accuracy": 0.646, "large_model_usage": 0.248, "small_model_usage": 0.752, "avg_lambda": 0.4000000000000035, "avg_cost": 0.00019496839999999974, "unrestricted": {"accuracy": 0.654, "large_model_usage": 0.248, "avg_cost": 0.00020637239999999975}, "random_baseline": {"accuracy": 0.628, "accuracy_std": 0.00824621125123533, "avg_cost": 0.00019392963999999985, "cost_std": 1.3347028572023095e-05, "small_model_fraction": 0.7522930586050699}, "unrestricted_random_baseline": {"accuracy": 0.6474, "accuracy_std": 0.011629273408085316, "avg_cost": 0.00020938465999999987, "cost_std": 1.3141954267474907e-05, "small_model_fraction": 0.734653610835484}}, "0.25": {"accuracy": 0.584, "large_model_usage": 0.146, "small_model_usage": 0.854, "avg_lambda": 0.079999999999999, "avg_cost": 0.00011785239999999996, "unrestricted": {"accuracy": 0.612, "large_model_usage": 0.146, "avg_cost": 0.0001343564}, "random_baseline": {"accuracy": 0.6002, "accuracy_std": 0.005095095681142804, "avg_cost": 0.00011208653999999994, "cost_std": 8.823957183508998e-06, "small_model_fraction": 0.8715743330876534}, "unrestricted_random_baseline": {"accuracy": 0.6038, "accuracy_std": 0.007400000000000006, "avg_cost": 0.00013096557999999994, "cost_std": 1.0130913012932257e-05, "small_model_fraction": 0.8460463213383332}}}}