{"trial": 24, "trial_seed": 65, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.594, "large_model_accuracy": 0.821, "small_model_cost": 3.442690000000002e-05, "large_model_cost": 0.0006723380000000003, "hybrid_results": {"0.05": {"accuracy": 0.822, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0007076456, "unrestricted": {"accuracy": 0.822, "large_model_usage": 1.0, "avg_cost": 0.0007076456}, "random_baseline": {"accuracy": 0.8219999999999998, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006731199999999993, "cost_std": 0.0, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.8219999999999998, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006731199999999993, "cost_std": 0.0, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.764, "large_model_usage": 0.616, "small_model_usage": 0.384, "avg_lambda": 0.800000000000007, "avg_cost": 0.0004576056000000001, "unrestricted": {"accuracy": 0.768, "large_model_usage": 0.616, "avg_cost": 0.00046116560000000013}, "random_baseline": {"accuracy": 0.7504, "accuracy_std": 0.013230268326832993, "avg_cost": 0.00045486288000000055, "cost_std": 1.0217700349178398e-05, "small_model_fraction": 0.33661806480558204}, "unrestricted_random_baseline": {"accuracy": 0.7485999999999999, "accuracy_std": 0.010584894897919401, "avg_cost": 0.00046053710000000037, "cost_std": 1.5478563318150725e-05, "small_model_fraction": 0.33103734987524136}}, "0.15": {"accuracy": 0.696, "large_model_usage": 0.36, "small_model_usage": 0.64, "avg_lambda": 0.6499999999999982, "avg_cost": 0.00028539760000000026, "unrestricted": {"accuracy": 0.704, "large_model_usage": 0.36, "avg_cost": 0.00028930160000000026}, "random_baseline": {"accuracy": 0.6952, "accuracy_std": 0.012843675486401826, "avg_cost": 0.00029122910000000014, "cost_std": 1.3354681571419091e-05, "small_model_fraction": 0.6065741762449343}, "unrestricted_random_baseline": {"accuracy": 0.6934, "accuracy_std": 0.01743674281510165, "avg_cost": 0.0002849654800000001, "cost_std": 1.0594916865818257e-05, "small_model_fraction": 0.6004542012202012}}, "0.2": {"accuracy": 0.654, "large_model_usage": 0.198, "small_model_usage": 0.802, "avg_lambda": 0.34999999999999676, "avg_cost": 0.00016398959999999998, "unrestricted": {"accuracy": 0.668, "large_model_usage": 0.198, "avg_cost": 0.00017472559999999983}, "random_baseline": {"accuracy": 0.6477999999999999, "accuracy_std": 0.009937806599043884, "avg_cost": 0.00016374053999999993, "cost_std": 1.8658892676265657e-05, "small_model_fraction": 0.7968953667681908}, "unrestricted_random_baseline": {"accuracy": 0.649, "accuracy_std": 0.007113367697511502, "avg_cost": 0.00017736911999999988, "cost_std": 1.1668623125699095e-05, "small_model_fraction": 0.7800654354501753}}, "0.25": {"accuracy": 0.608, "large_model_usage": 0.02, "small_model_usage": 0.98, "avg_lambda": 0.0, "avg_cost": 4.5421600000000006e-05, "unrestricted": {"accuracy": 0.612, "large_model_usage": 0.02, "avg_cost": 4.7777599999999995e-05}, "random_baseline": {"accuracy": 0.6052, "accuracy_std": 0.004749736834815172, "avg_cost": 4.632865999999999e-05, "cost_std": 3.1328094867706143e-06, "small_model_fraction": 0.9827645262796023}, "unrestricted_random_baseline": {"accuracy": 0.603, "accuracy_std": 0.002720294101747091, "avg_cost": 4.6956e-05, "cost_std": 5.355623426642315e-06, "small_model_fraction": 0.9790712216796353}}}}