{"trial": 17, "trial_seed": 58, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.594, "large_model_accuracy": 0.826, "small_model_cost": 3.466269999999994e-05, "large_model_cost": 0.0006769980000000016, "hybrid_results": {"0.05": {"accuracy": 0.824, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0007129683999999999, "unrestricted": {"accuracy": 0.824, "large_model_usage": 1.0, "avg_cost": 0.0007129683999999999}, "random_baseline": {"accuracy": 0.8240000000000001, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.000678248, "cost_std": 1.0842021724855044e-19, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.8240000000000001, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.000678248, "cost_std": 1.0842021724855044e-19, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.76, "large_model_usage": 0.55, "small_model_usage": 0.44999999999999996, "avg_lambda": 0.7799999999999914, "avg_cost": 0.0004114604000000002, "unrestricted": {"accuracy": 0.76, "large_model_usage": 0.55, "avg_cost": 0.00041476840000000007}, "random_baseline": {"accuracy": 0.728, "accuracy_std": 0.00885437744847147, "avg_cost": 0.0004174977199999999, "cost_std": 1.1130441309921203e-05, "small_model_fraction": 0.41339406381682703}, "unrestricted_random_baseline": {"accuracy": 0.7310000000000001, "accuracy_std": 0.009219544457292894, "avg_cost": 0.00041318137999999993, "cost_std": 7.145235520512939e-06, "small_model_fraction": 0.4082441055318007}}, "0.15": {"accuracy": 0.73, "large_model_usage": 0.534, "small_model_usage": 0.46599999999999997, "avg_lambda": 0.6999999999999935, "avg_cost": 0.0003796084000000001, "unrestricted": {"accuracy": 0.758, "large_model_usage": 0.534, "avg_cost": 0.00040273640000000014}, "random_baseline": {"accuracy": 0.72, "accuracy_std": 0.011593101396951562, "avg_cost": 0.00037737644000000004, "cost_std": 9.61689094408364e-06, "small_model_fraction": 0.4629818725516109}, "unrestricted_random_baseline": {"accuracy": 0.731, "accuracy_std": 0.01780449381476487, "avg_cost": 0.00040219306, "cost_std": 1.3968543175592745e-05, "small_model_fraction": 0.4269757554971691}}, "0.2": {"accuracy": 0.664, "large_model_usage": 0.24, "small_model_usage": 0.76, "avg_lambda": 0.4000000000000035, "avg_cost": 0.0001913203999999997, "unrestricted": {"accuracy": 0.674, "large_model_usage": 0.24, "avg_cost": 0.00020193639999999973}, "random_baseline": {"accuracy": 0.6518, "accuracy_std": 0.007871467461661778, "avg_cost": 0.00019393117999999985, "cost_std": 9.292226978049957e-06, "small_model_fraction": 0.7561122672224314}, "unrestricted_random_baseline": {"accuracy": 0.6634, "accuracy_std": 0.009961927524329825, "avg_cost": 0.0002052435799999999, "cost_std": 1.3105796507942612e-05, "small_model_fraction": 0.7395850734032533}}, "0.25": {"accuracy": 0.606, "large_model_usage": 0.046, "small_model_usage": 0.954, "avg_lambda": 0.0399999999999995, "avg_cost": 6.267239999999999e-05, "unrestricted": {"accuracy": 0.618, "large_model_usage": 0.046, "avg_cost": 6.82684e-05}, "random_baseline": {"accuracy": 0.6096, "accuracy_std": 0.004079215610874231, "avg_cost": 6.262975999999997e-05, "cost_std": 3.901184492740631e-06, "small_model_fraction": 0.9563939581087947}, "unrestricted_random_baseline": {"accuracy": 0.6098, "accuracy_std": 0.004512205669071395, "avg_cost": 6.636134e-05, "cost_std": 2.7608568656125554e-06, "small_model_fraction": 0.9476819972372685}}}}