{"trial": 28, "trial_seed": 69, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.615, "large_model_accuracy": 0.832, "small_model_cost": 3.461159999999998e-05, "large_model_cost": 0.0006758480000000004, "hybrid_results": {"0.05": {"accuracy": 0.822, "large_model_usage": 0.83, "small_model_usage": 0.17000000000000004, "avg_lambda": 0.8600000000000094, "avg_cost": 0.0005937162, "unrestricted": {"accuracy": 0.828, "large_model_usage": 0.83, "avg_cost": 0.0006141081999999996}, "random_baseline": {"accuracy": 0.8248, "accuracy_std": 0.005878775382679633, "avg_cost": 0.0006019565, "cost_std": 1.3327257923293977e-05, "small_model_fraction": 0.12808349619578735}, "unrestricted_random_baseline": {"accuracy": 0.8309999999999998, "accuracy_std": 0.007912016177941, "avg_cost": 0.0006227279800000001, "cost_std": 8.508079915680165e-06, "small_model_fraction": 0.096282431876919}}, "0.1": {"accuracy": 0.804, "large_model_usage": 0.53, "small_model_usage": 0.47, "avg_lambda": 0.7599999999999948, "avg_cost": 0.0004026521999999997, "unrestricted": {"accuracy": 0.81, "large_model_usage": 0.53, "avg_cost": 0.00040853619999999983}, "random_baseline": {"accuracy": 0.7575999999999999, "accuracy_std": 0.0123223374405995, "avg_cost": 0.0004038992800000003, "cost_std": 1.2774704697393258e-05, "small_model_fraction": 0.4260453710987095}, "unrestricted_random_baseline": {"accuracy": 0.7607999999999999, "accuracy_std": 0.015025311976794372, "avg_cost": 0.0004202403800000003, "cost_std": 1.4396361346520845e-05, "small_model_fraction": 0.4168693480282785}}, "0.15": {"accuracy": 0.742, "large_model_usage": 0.31, "small_model_usage": 0.69, "avg_lambda": 0.5800000000000031, "avg_cost": 0.00024520020000000007, "unrestricted": {"accuracy": 0.746, "large_model_usage": 0.31, "avg_cost": 0.0002561442000000001}, "random_baseline": {"accuracy": 0.6994, "accuracy_std": 0.005517245689653494, "avg_cost": 0.00024385287999999995, "cost_std": 1.4798790667402544e-05, "small_model_fraction": 0.6715897600323376}, "unrestricted_random_baseline": {"accuracy": 0.7006, "accuracy_std": 0.008856635930193818, "avg_cost": 0.00026078506000000006, "cost_std": 1.2255047890579637e-05, "small_model_fraction": 0.65452273139828}}, "0.2": {"accuracy": 0.7, "large_model_usage": 0.218, "small_model_usage": 0.782, "avg_lambda": 0.4000000000000035, "avg_cost": 0.00017626820000000008, "unrestricted": {"accuracy": 0.726, "large_model_usage": 0.218, "avg_cost": 0.0001892322000000001}, "random_baseline": {"accuracy": 0.669, "accuracy_std": 0.013921206844235867, "avg_cost": 0.00017551138, "cost_std": 1.1180994731579093e-05, "small_model_fraction": 0.779088336220464}, "unrestricted_random_baseline": {"accuracy": 0.6788000000000001, "accuracy_std": 0.009217374897442319, "avg_cost": 0.00019599812, "cost_std": 1.3141152500203282e-05, "small_model_fraction": 0.758871143310018}}, "0.25": {"accuracy": 0.626, "large_model_usage": 0.014, "small_model_usage": 0.986, "avg_lambda": 0.0, "avg_cost": 4.3436199999999976e-05, "unrestricted": {"accuracy": 0.632, "large_model_usage": 0.014, "avg_cost": 4.511219999999999e-05}, "random_baseline": {"accuracy": 0.6279999999999999, "accuracy_std": 0.00268328157299975, "avg_cost": 4.4781059999999994e-05, "cost_std": 3.7229481157276565e-06, "small_model_fraction": 0.9862381486765257}, "unrestricted_random_baseline": {"accuracy": 0.6268, "accuracy_std": 0.0031240998703626643, "avg_cost": 4.6664660000000005e-05, "cost_std": 2.3906020514506414e-06, "small_model_fraction": 0.9836244480194825}}}}