{"trial": 4, "trial_seed": 45, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.587, "large_model_accuracy": 0.828, "small_model_cost": 3.466070000000005e-05, "large_model_cost": 0.0006758779999999994, "hybrid_results": {"0.05": {"accuracy": 0.834, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0007080287999999999, "unrestricted": {"accuracy": 0.834, "large_model_usage": 1.0, "avg_cost": 0.0007080287999999999}, "random_baseline": {"accuracy": 0.834, "accuracy_std": 0.0, "avg_cost": 0.0006734719999999997, "cost_std": 0.0, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.834, "accuracy_std": 0.0, "avg_cost": 0.0006734719999999997, "cost_std": 0.0, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.748, "large_model_usage": 0.512, "small_model_usage": 0.488, "avg_lambda": 0.7799999999999914, "avg_cost": 0.0003872888, "unrestricted": {"accuracy": 0.754, "large_model_usage": 0.512, "avg_cost": 0.0003899687999999999}, "random_baseline": {"accuracy": 0.7234, "accuracy_std": 0.0139441744108427, "avg_cost": 0.0003832515600000001, "cost_std": 2.018956540984469e-05, "small_model_fraction": 0.450064588088936}, "unrestricted_random_baseline": {"accuracy": 0.728, "accuracy_std": 0.015491933384829681, "avg_cost": 0.00038630650000000017, "cost_std": 1.3239534336901885e-05, "small_model_fraction": 0.44588503772434057}}, "0.15": {"accuracy": 0.71, "large_model_usage": 0.474, "small_model_usage": 0.526, "avg_lambda": 0.6999999999999935, "avg_cost": 0.00034173679999999993, "unrestricted": {"accuracy": 0.744, "large_model_usage": 0.474, "avg_cost": 0.0003620167999999999}, "random_baseline": {"accuracy": 0.7108, "accuracy_std": 0.010205880657738467, "avg_cost": 0.00034088896000000003, "cost_std": 1.3269428891342695e-05, "small_model_fraction": 0.5211044680173161}, "unrestricted_random_baseline": {"accuracy": 0.72, "accuracy_std": 0.008099382692526642, "avg_cost": 0.0003604185600000001, "cost_std": 1.329204628213427e-05, "small_model_fraction": 0.4894771242135854}}, "0.2": {"accuracy": 0.658, "large_model_usage": 0.236, "small_model_usage": 0.764, "avg_lambda": 0.44999999999999624, "avg_cost": 0.0001864928000000001, "unrestricted": {"accuracy": 0.666, "large_model_usage": 0.236, "avg_cost": 0.00019516080000000005}, "random_baseline": {"accuracy": 0.6534000000000001, "accuracy_std": 0.008901685233707162, "avg_cost": 0.00018330250000000005, "cost_std": 1.12861499866872e-05, "small_model_fraction": 0.7632127205551065}, "unrestricted_random_baseline": {"accuracy": 0.6598, "accuracy_std": 0.0086925255248403, "avg_cost": 0.00018677422000000005, "cost_std": 1.3651518829624803e-05, "small_model_fraction": 0.7496946822863324}}, "0.25": {"accuracy": 0.614, "large_model_usage": 0.112, "small_model_usage": 0.888, "avg_lambda": 0.05999999999999949, "avg_cost": 9.87488e-05, "unrestricted": {"accuracy": 0.632, "large_model_usage": 0.112, "avg_cost": 0.00011232880000000004}, "random_baseline": {"accuracy": 0.6202, "accuracy_std": 0.0071805292284065035, "avg_cost": 9.698729999999997e-05, "cost_std": 8.35226589866487e-06, "small_model_fraction": 0.9000524471189408}, "unrestricted_random_baseline": {"accuracy": 0.625, "accuracy_std": 0.008497058314499208, "avg_cost": 0.00011068114000000002, "cost_std": 6.456818979063941e-06, "small_model_fraction": 0.878873979226699}}}}