{"trial": 18, "trial_seed": 59, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.575, "large_model_accuracy": 0.823, "small_model_cost": 3.434260000000001e-05, "large_model_cost": 0.0006703319999999992, "hybrid_results": {"0.05": {"accuracy": 0.798, "large_model_usage": 0.804, "small_model_usage": 0.19599999999999995, "avg_lambda": 0.939999999999998, "avg_cost": 0.0005785057999999999, "unrestricted": {"accuracy": 0.798, "large_model_usage": 0.804, "avg_cost": 0.0005785057999999999}, "random_baseline": {"accuracy": 0.8054, "accuracy_std": 0.0058685603004484735, "avg_cost": 0.0005793072600000001, "cost_std": 1.3218251571535352e-05, "small_model_fraction": 0.1443832239971285}, "unrestricted_random_baseline": {"accuracy": 0.8032, "accuracy_std": 0.006144916598294892, "avg_cost": 0.0005734541600000002, "cost_std": 6.947731301540077e-06, "small_model_fraction": 0.1443832239971285}}, "0.1": {"accuracy": 0.76, "large_model_usage": 0.624, "small_model_usage": 0.376, "avg_lambda": 0.800000000000007, "avg_cost": 0.0004528458000000003, "unrestricted": {"accuracy": 0.766, "large_model_usage": 0.624, "avg_cost": 0.0004572738000000002}, "random_baseline": {"accuracy": 0.7514000000000001, "accuracy_std": 0.008765842800324459, "avg_cost": 0.0004490494600000003, "cost_std": 7.487255511627801e-06, "small_model_fraction": 0.3419651333811526}, "unrestricted_random_baseline": {"accuracy": 0.7462, "accuracy_std": 0.011981652640600136, "avg_cost": 0.0004483482000000003, "cost_std": 1.3571859251849063e-05, "small_model_fraction": 0.33500275319053946}}, "0.15": {"accuracy": 0.712, "large_model_usage": 0.514, "small_model_usage": 0.486, "avg_lambda": 0.6999999999999935, "avg_cost": 0.0003634458000000003, "unrestricted": {"accuracy": 0.742, "large_model_usage": 0.514, "avg_cost": 0.00038405380000000014}, "random_baseline": {"accuracy": 0.7142, "accuracy_std": 0.0103324730824716, "avg_cost": 0.0003689364000000001, "cost_std": 1.1275495009266778e-05, "small_model_fraction": 0.4825335139233442}, "unrestricted_random_baseline": {"accuracy": 0.7252, "accuracy_std": 0.006823488843692799, "avg_cost": 0.00037914342000000024, "cost_std": 8.71989685464232e-06, "small_model_fraction": 0.4501304581491443}}, "0.2": {"accuracy": 0.672, "large_model_usage": 0.3, "small_model_usage": 0.7, "avg_lambda": 0.5, "avg_cost": 0.0002293617999999999, "unrestricted": {"accuracy": 0.686, "large_model_usage": 0.3, "avg_cost": 0.00023872979999999984}, "random_baseline": {"accuracy": 0.6656000000000002, "accuracy_std": 0.013705473359209444, "avg_cost": 0.0002282635399999999, "cost_std": 1.3871334200011227e-05, "small_model_fraction": 0.6933609270846336}, "unrestricted_random_baseline": {"accuracy": 0.6674, "accuracy_std": 0.011629273408085315, "avg_cost": 0.00023994343999999982, "cost_std": 1.2358630450596038e-05, "small_model_fraction": 0.6786311218394518}}, "0.25": {"accuracy": 0.62, "large_model_usage": 0.198, "small_model_usage": 0.802, "avg_lambda": 0.29999999999999993, "avg_cost": 0.00015213779999999994, "unrestricted": {"accuracy": 0.656, "large_model_usage": 0.198, "avg_cost": 0.00016832979999999994}, "random_baseline": {"accuracy": 0.6307999999999999, "accuracy_std": 0.008109253973085324, "avg_cost": 0.00015016721999999992, "cost_std": 1.4637985193174646e-05, "small_model_fraction": 0.8147843344558886}, "unrestricted_random_baseline": {"accuracy": 0.6364000000000001, "accuracy_std": 0.006858571279792905, "avg_cost": 0.00016875657999999995, "cost_std": 1.2633837802172392e-05, "small_model_fraction": 0.789324790633303}}}}