{"trial": 14, "trial_seed": 55, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.593, "large_model_accuracy": 0.834, "small_model_cost": 3.476130000000004e-05, "large_model_cost": 0.0006796259999999994, "hybrid_results": {"0.05": {"accuracy": 0.838, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0007145521999999996, "unrestricted": {"accuracy": 0.838, "large_model_usage": 1.0, "avg_cost": 0.0007145521999999996}, "random_baseline": {"accuracy": 0.8379999999999999, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006796839999999996, "cost_std": 0.0, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.8379999999999999, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006796839999999996, "cost_std": 0.0, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.776, "large_model_usage": 0.62, "small_model_usage": 0.38, "avg_lambda": 0.800000000000007, "avg_cost": 0.00045944019999999994, "unrestricted": {"accuracy": 0.784, "large_model_usage": 0.62, "avg_cost": 0.0004648361999999999}, "random_baseline": {"accuracy": 0.7696, "accuracy_std": 0.014051334456200248, "avg_cost": 0.00045901184000000017, "cost_std": 1.904796258013973e-05, "small_model_fraction": 0.34144495736857616}, "unrestricted_random_baseline": {"accuracy": 0.7657999999999999, "accuracy_std": 0.014379151574414965, "avg_cost": 0.0004581168200000002, "cost_std": 1.4996843861679698e-05, "small_model_fraction": 0.3330773106358585}}, "0.15": {"accuracy": 0.74, "large_model_usage": 0.512, "small_model_usage": 0.488, "avg_lambda": 0.6999999999999935, "avg_cost": 0.0003657122000000003, "unrestricted": {"accuracy": 0.766, "large_model_usage": 0.512, "avg_cost": 0.0003896282000000002}, "random_baseline": {"accuracy": 0.7243999999999999, "accuracy_std": 0.012289833196589784, "avg_cost": 0.00036748504000000024, "cost_std": 1.5494209211392562e-05, "small_model_fraction": 0.486790174745182}, "unrestricted_random_baseline": {"accuracy": 0.7484, "accuracy_std": 0.012611106216347567, "avg_cost": 0.00039711880000000023, "cost_std": 1.6216719593801997e-05, "small_model_fraction": 0.4497033253642189}}, "0.2": {"accuracy": 0.71, "large_model_usage": 0.28, "small_model_usage": 0.72, "avg_lambda": 0.5, "avg_cost": 0.0002263362, "unrestricted": {"accuracy": 0.718, "large_model_usage": 0.28, "avg_cost": 0.00023455619999999998}, "random_baseline": {"accuracy": 0.6812, "accuracy_std": 0.01223764683262266, "avg_cost": 0.00022859344, "cost_std": 1.2396523784609952e-05, "small_model_fraction": 0.7029223339407473}, "unrestricted_random_baseline": {"accuracy": 0.6848, "accuracy_std": 0.009967948635501666, "avg_cost": 0.00023359004000000002, "cost_std": 1.3216775284402766e-05, "small_model_fraction": 0.6901754740180379}}, "0.25": {"accuracy": 0.662, "large_model_usage": 0.188, "small_model_usage": 0.812, "avg_lambda": 0.31000000000000166, "avg_cost": 0.00014924419999999997, "unrestricted": {"accuracy": 0.69, "large_model_usage": 0.188, "avg_cost": 0.0001650161999999998}, "random_baseline": {"accuracy": 0.6616000000000001, "accuracy_std": 0.00542586398650022, "avg_cost": 0.00014910267999999996, "cost_std": 9.647884355525824e-06, "small_model_fraction": 0.822469891746284}, "unrestricted_random_baseline": {"accuracy": 0.6608, "accuracy_std": 0.007110555533852477, "avg_cost": 0.00016130401999999992, "cost_std": 1.3484896113637694e-05, "small_model_fraction": 0.7980120481086964}}}}