{"trial": 29, "trial_seed": 70, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.597, "large_model_accuracy": 0.825, "small_model_cost": 3.478389999999999e-05, "large_model_cost": 0.0006786059999999991, "hybrid_results": {"0.05": {"accuracy": 0.792, "large_model_usage": 0.804, "small_model_usage": 0.19599999999999995, "avg_lambda": 0.8999999999999925, "avg_cost": 0.0005872314, "unrestricted": {"accuracy": 0.792, "large_model_usage": 0.804, "avg_cost": 0.0005877114000000001}, "random_baseline": {"accuracy": 0.7857999999999999, "accuracy_std": 0.00729109045342328, "avg_cost": 0.0005843124799999998, "cost_std": 7.877998952119728e-06, "small_model_fraction": 0.14192523058776518}, "unrestricted_random_baseline": {"accuracy": 0.7892000000000001, "accuracy_std": 0.012998461447417549, "avg_cost": 0.0005843374399999999, "cost_std": 1.0000151212376775e-05, "small_model_fraction": 0.14117968302113137}}, "0.1": {"accuracy": 0.738, "large_model_usage": 0.522, "small_model_usage": 0.478, "avg_lambda": 0.7599999999999948, "avg_cost": 0.0003922754000000007, "unrestricted": {"accuracy": 0.744, "large_model_usage": 0.522, "avg_cost": 0.0003978074000000008}, "random_baseline": {"accuracy": 0.7212, "accuracy_std": 0.01203993355463394, "avg_cost": 0.00038978812000000046, "cost_std": 1.9497804348223585e-05, "small_model_fraction": 0.44473558767243127}, "unrestricted_random_baseline": {"accuracy": 0.7209999999999999, "accuracy_std": 0.007000000000000006, "avg_cost": 0.0003966599000000003, "cost_std": 1.0950436951373232e-05, "small_model_fraction": 0.43614315196697767}}, "0.15": {"accuracy": 0.7, "large_model_usage": 0.36, "small_model_usage": 0.64, "avg_lambda": 0.6499999999999982, "avg_cost": 0.0002819674000000001, "unrestricted": {"accuracy": 0.706, "large_model_usage": 0.36, "avg_cost": 0.00028645940000000017}, "random_baseline": {"accuracy": 0.6838, "accuracy_std": 0.014709180806557486, "avg_cost": 0.00027795158000000014, "cost_std": 1.0950801250118744e-05, "small_model_fraction": 0.6160686313812459}, "unrestricted_random_baseline": {"accuracy": 0.683, "accuracy_std": 0.008955445270895212, "avg_cost": 0.0002857493800000002, "cost_std": 1.1109273921710682e-05, "small_model_fraction": 0.609091548736832}}, "0.2": {"accuracy": 0.666, "large_model_usage": 0.206, "small_model_usage": 0.794, "avg_lambda": 0.37000000000000255, "avg_cost": 0.00016693539999999992, "unrestricted": {"accuracy": 0.678, "large_model_usage": 0.206, "avg_cost": 0.00017864339999999998}, "random_baseline": {"accuracy": 0.6428, "accuracy_std": 0.01121427661510096, "avg_cost": 0.00016142828, "cost_std": 1.1831011873276107e-05, "small_model_fraction": 0.794739105725013}, "unrestricted_random_baseline": {"accuracy": 0.6504, "accuracy_std": 0.007031358332498784, "avg_cost": 0.00017485683999999996, "cost_std": 7.213292644444699e-06, "small_model_fraction": 0.7765539579955393}}, "0.25": {"accuracy": 0.62, "large_model_usage": 0.136, "small_model_usage": 0.864, "avg_lambda": 0.05999999999999949, "avg_cost": 0.00011478739999999997, "unrestricted": {"accuracy": 0.662, "large_model_usage": 0.136, "avg_cost": 0.00013150739999999997}, "random_baseline": {"accuracy": 0.6294, "accuracy_std": 0.006069596362197412, "avg_cost": 0.00011307771999999996, "cost_std": 1.3198644111332038e-05, "small_model_fraction": 0.8757366359433761}, "unrestricted_random_baseline": {"accuracy": 0.6344000000000001, "accuracy_std": 0.01003194896318757, "avg_cost": 0.00013049982, "cost_std": 7.155506950845602e-06, "small_model_fraction": 0.8497667290389688}}}}