{"trial": 21, "trial_seed": 62, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.593, "large_model_accuracy": 0.823, "small_model_cost": 3.4484e-05, "large_model_cost": 0.0006737199999999989, "hybrid_results": {"0.05": {"accuracy": 0.798, "large_model_usage": 0.796, "small_model_usage": 0.20399999999999996, "avg_lambda": 0.8799999999999963, "avg_cost": 0.0005618943999999998, "unrestricted": {"accuracy": 0.808, "large_model_usage": 0.796, "avg_cost": 0.0005736183999999999}, "random_baseline": {"accuracy": 0.8016, "accuracy_std": 0.011892854997854786, "avg_cost": 0.0005570530400000001, "cost_std": 1.3125401118990542e-05, "small_model_fraction": 0.17493633024422794}, "unrestricted_random_baseline": {"accuracy": 0.7974, "accuracy_std": 0.006814690014960334, "avg_cost": 0.0005676233800000002, "cost_std": 1.7192972875323234e-05, "small_model_fraction": 0.1565956861002808}}, "0.1": {"accuracy": 0.764, "large_model_usage": 0.526, "small_model_usage": 0.474, "avg_lambda": 0.7599999999999948, "avg_cost": 0.0003868144, "unrestricted": {"accuracy": 0.77, "large_model_usage": 0.526, "avg_cost": 0.0003944384000000002}, "random_baseline": {"accuracy": 0.738, "accuracy_std": 0.0065115282384398876, "avg_cost": 0.00038293206000000036, "cost_std": 1.2619855136426926e-05, "small_model_fraction": 0.448825785781776}, "unrestricted_random_baseline": {"accuracy": 0.735, "accuracy_std": 0.009348796714016203, "avg_cost": 0.0003912001200000005, "cost_std": 1.4444685944166518e-05, "small_model_fraction": 0.4368990482388338}}, "0.15": {"accuracy": 0.724, "large_model_usage": 0.496, "small_model_usage": 0.504, "avg_lambda": 0.6999999999999935, "avg_cost": 0.00034775440000000017, "unrestricted": {"accuracy": 0.766, "large_model_usage": 0.496, "avg_cost": 0.0003703264000000001}, "random_baseline": {"accuracy": 0.7196, "accuracy_std": 0.010268398122394759, "avg_cost": 0.00034086272000000026, "cost_std": 1.1016508017770428e-05, "small_model_fraction": 0.5099299789123254}, "unrestricted_random_baseline": {"accuracy": 0.7335999999999998, "accuracy_std": 0.008380930735902792, "avg_cost": 0.00036730396000000037, "cost_std": 1.0996387496555323e-05, "small_model_fraction": 0.4746190765225977}}, "0.2": {"accuracy": 0.684, "large_model_usage": 0.234, "small_model_usage": 0.766, "avg_lambda": 0.4000000000000035, "avg_cost": 0.00018441840000000005, "unrestricted": {"accuracy": 0.698, "large_model_usage": 0.234, "avg_cost": 0.00019726239999999992}, "random_baseline": {"accuracy": 0.6648000000000001, "accuracy_std": 0.009431860898041286, "avg_cost": 0.00018165088, "cost_std": 1.3260401620825775e-05, "small_model_fraction": 0.7654475029566541}, "unrestricted_random_baseline": {"accuracy": 0.6648000000000001, "accuracy_std": 0.012464349160706318, "avg_cost": 0.00019332936000000003, "cost_std": 1.7287336446266083e-05, "small_model_fraction": 0.7453547672534083}}, "0.25": {"accuracy": 0.622, "large_model_usage": 0.108, "small_model_usage": 0.892, "avg_lambda": 0.05000000000000044, "avg_cost": 9.503439999999997e-05, "unrestricted": {"accuracy": 0.644, "large_model_usage": 0.108, "avg_cost": 0.00010797040000000003}, "random_baseline": {"accuracy": 0.6308, "accuracy_std": 0.006079473661428271, "avg_cost": 9.569077999999996e-05, "cost_std": 9.602617027644104e-06, "small_model_fraction": 0.905276924328417}, "unrestricted_random_baseline": {"accuracy": 0.633, "accuracy_std": 0.006942621983083919, "avg_cost": 0.00010527953999999999, "cost_std": 1.1077286378188473e-05, "small_model_fraction": 0.8850402668185143}}}}