{"trial": 20, "trial_seed": 61, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.573, "large_model_accuracy": 0.816, "small_model_cost": 3.486559999999997e-05, "large_model_cost": 0.0006818879999999995, "hybrid_results": {"0.05": {"accuracy": 0.776, "large_model_usage": 0.838, "small_model_usage": 0.16200000000000003, "avg_lambda": 0.8899999999999905, "avg_cost": 0.0006025840000000003, "unrestricted": {"accuracy": 0.778, "large_model_usage": 0.838, "avg_cost": 0.0006110120000000003}, "random_baseline": {"accuracy": 0.785, "accuracy_std": 0.004312771730569568, "avg_cost": 0.0005967136599999999, "cost_std": 9.29930610832884e-06, "small_model_fraction": 0.12256762671585912}, "unrestricted_random_baseline": {"accuracy": 0.79, "accuracy_std": 0.005059644256269411, "avg_cost": 0.0006063483199999998, "cost_std": 7.618368071023995e-06, "small_model_fraction": 0.10954180257128546}}, "0.1": {"accuracy": 0.736, "large_model_usage": 0.56, "small_model_usage": 0.43999999999999995, "avg_lambda": 0.7599999999999948, "avg_cost": 0.00041486399999999987, "unrestricted": {"accuracy": 0.744, "large_model_usage": 0.56, "avg_cost": 0.0004229679999999997}, "random_baseline": {"accuracy": 0.7118, "accuracy_std": 0.008364209466530602, "avg_cost": 0.00040382746000000016, "cost_std": 7.346849839244111e-06, "small_model_fraction": 0.4126966856170665}, "unrestricted_random_baseline": {"accuracy": 0.7134, "accuracy_std": 0.012745195173083868, "avg_cost": 0.00042297544000000016, "cost_std": 1.199346007315652e-05, "small_model_fraction": 0.4001716169332005}}, "0.15": {"accuracy": 0.696, "large_model_usage": 0.528, "small_model_usage": 0.472, "avg_lambda": 0.6999999999999935, "avg_cost": 0.0003714079999999997, "unrestricted": {"accuracy": 0.732, "large_model_usage": 0.528, "avg_cost": 0.0003992119999999997}, "random_baseline": {"accuracy": 0.6981999999999999, "accuracy_std": 0.013159027319676761, "avg_cost": 0.0003726028800000001, "cost_std": 1.574321774027151e-05, "small_model_fraction": 0.4798597390136725}, "unrestricted_random_baseline": {"accuracy": 0.7038, "accuracy_std": 0.01086093918590838, "avg_cost": 0.00038505514, "cost_std": 1.7492504179573624e-05, "small_model_fraction": 0.43688750188556075}}, "0.2": {"accuracy": 0.632, "large_model_usage": 0.246, "small_model_usage": 0.754, "avg_lambda": 0.44999999999999624, "avg_cost": 0.00019755999999999976, "unrestricted": {"accuracy": 0.646, "large_model_usage": 0.246, "avg_cost": 0.00020591599999999978}, "random_baseline": {"accuracy": 0.6300000000000001, "accuracy_std": 0.013145341380123998, "avg_cost": 0.00019332339999999988, "cost_std": 1.1830200316816307e-05, "small_model_fraction": 0.7485490455971852}, "unrestricted_random_baseline": {"accuracy": 0.6394000000000001, "accuracy_std": 0.005730619512757765, "avg_cost": 0.00020468689999999994, "cost_std": 1.0992653995555437e-05, "small_model_fraction": 0.7356345004438797}}, "0.25": {"accuracy": 0.606, "large_model_usage": 0.188, "small_model_usage": 0.812, "avg_lambda": 0.2699999999999993, "avg_cost": 0.00015010000000000007, "unrestricted": {"accuracy": 0.632, "large_model_usage": 0.188, "avg_cost": 0.0001687799999999999}, "random_baseline": {"accuracy": 0.6185999999999999, "accuracy_std": 0.010547037498748177, "avg_cost": 0.00015325746000000002, "cost_std": 1.0630294307139347e-05, "small_model_fraction": 0.8219004473415445}, "unrestricted_random_baseline": {"accuracy": 0.6247999999999999, "accuracy_std": 0.007600000000000007, "avg_cost": 0.00016136713999999994, "cost_std": 8.902239462539726e-06, "small_model_fraction": 0.7930297312736004}}}}