{"trial": 12, "trial_seed": 53, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.578, "large_model_accuracy": 0.831, "small_model_cost": 3.448620000000002e-05, "large_model_cost": 0.00067474, "hybrid_results": {"0.05": {"accuracy": 0.766, "large_model_usage": 0.812, "small_model_usage": 0.18799999999999994, "avg_lambda": 0.8899999999999905, "avg_cost": 0.0005862446000000004, "unrestricted": {"accuracy": 0.772, "large_model_usage": 0.812, "avg_cost": 0.0005937686000000004}, "random_baseline": {"accuracy": 0.7868, "accuracy_std": 0.010127191120937738, "avg_cost": 0.00058802474, "cost_std": 9.48005337539838e-06, "small_model_fraction": 0.13821924992869947}, "unrestricted_random_baseline": {"accuracy": 0.7904, "accuracy_std": 0.008754427451295723, "avg_cost": 0.00059511654, "cost_std": 1.0791416333753385e-05, "small_model_fraction": 0.1264676601685138}}, "0.1": {"accuracy": 0.746, "large_model_usage": 0.558, "small_model_usage": 0.44199999999999995, "avg_lambda": 0.7599999999999948, "avg_cost": 0.00041418460000000015, "unrestricted": {"accuracy": 0.746, "large_model_usage": 0.558, "avg_cost": 0.0004203486000000001}, "random_baseline": {"accuracy": 0.7234, "accuracy_std": 0.010001999800039999, "avg_cost": 0.00042010782000000016, "cost_std": 1.6891241370473682e-05, "small_model_fraction": 0.4069564288411874}, "unrestricted_random_baseline": {"accuracy": 0.7188, "accuracy_std": 0.009303762679690413, "avg_cost": 0.0004248642000000001, "cost_std": 9.982838799459765e-06, "small_model_fraction": 0.3973289967197381}}, "0.15": {"accuracy": 0.682, "large_model_usage": 0.338, "small_model_usage": 0.6619999999999999, "avg_lambda": 0.5999999999999999, "avg_cost": 0.0002605006000000001, "unrestricted": {"accuracy": 0.692, "large_model_usage": 0.338, "avg_cost": 0.0002685366000000002}, "random_baseline": {"accuracy": 0.6568, "accuracy_std": 0.009846826900072944, "avg_cost": 0.0002603401800000001, "cost_std": 1.4815401738042763e-05, "small_model_fraction": 0.6469924895408663}, "unrestricted_random_baseline": {"accuracy": 0.6594000000000001, "accuracy_std": 0.010001999800039999, "avg_cost": 0.00026862880000000003, "cost_std": 1.1619468209173772e-05, "small_model_fraction": 0.6344412169049208}}, "0.2": {"accuracy": 0.646, "large_model_usage": 0.258, "small_model_usage": 0.742, "avg_lambda": 0.4000000000000035, "avg_cost": 0.00020021260000000006, "unrestricted": {"accuracy": 0.664, "large_model_usage": 0.258, "avg_cost": 0.00021372460000000008}, "random_baseline": {"accuracy": 0.6356, "accuracy_std": 0.010947145746723216, "avg_cost": 0.0002081083200000001, "cost_std": 1.208742251506081e-05, "small_model_fraction": 0.741155148161557}, "unrestricted_random_baseline": {"accuracy": 0.6432, "accuracy_std": 0.013029197979921876, "avg_cost": 0.0002186584400000001, "cost_std": 1.830464611333417e-05, "small_model_fraction": 0.7200510172684643}}, "0.25": {"accuracy": 0.59, "large_model_usage": 0.14, "small_model_usage": 0.86, "avg_lambda": 0.05999999999999949, "avg_cost": 0.00011441260000000004, "unrestricted": {"accuracy": 0.616, "large_model_usage": 0.14, "avg_cost": 0.00013173660000000007}, "random_baseline": {"accuracy": 0.6026, "accuracy_std": 0.005868560300448489, "avg_cost": 0.00011402336, "cost_std": 4.345613884182528e-06, "small_model_fraction": 0.8751645050759558}, "unrestricted_random_baseline": {"accuracy": 0.612, "accuracy_std": 0.008763560920082665, "avg_cost": 0.00013446486000000003, "cost_std": 7.868607685251552e-06, "small_model_fraction": 0.8481064852719343}}}}