{"trial": 9, "trial_seed": 50, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.6, "large_model_accuracy": 0.809, "small_model_cost": 3.444400000000001e-05, "large_model_cost": 0.000673792, "hybrid_results": {"0.05": {"accuracy": 0.766, "large_model_usage": 0.814, "small_model_usage": 0.18600000000000005, "avg_lambda": 0.8999999999999925, "avg_cost": 0.0005883783999999997, "unrestricted": {"accuracy": 0.766, "large_model_usage": 0.814, "avg_cost": 0.0005894223999999997}, "random_baseline": {"accuracy": 0.7849999999999999, "accuracy_std": 0.006016643582596535, "avg_cost": 0.0005873767200000001, "cost_std": 5.170324368315751e-06, "small_model_fraction": 0.13359484975318656}, "unrestricted_random_baseline": {"accuracy": 0.7882, "accuracy_std": 0.007236021006050223, "avg_cost": 0.0005946289599999999, "cost_std": 1.1255047743941372e-05, "small_model_fraction": 0.13196193622252708}}, "0.1": {"accuracy": 0.74, "large_model_usage": 0.524, "small_model_usage": 0.476, "avg_lambda": 0.7799999999999914, "avg_cost": 0.0003971784000000002, "unrestricted": {"accuracy": 0.74, "large_model_usage": 0.524, "avg_cost": 0.0004002064000000001}, "random_baseline": {"accuracy": 0.7172, "accuracy_std": 0.009431860898041286, "avg_cost": 0.00039890864000000005, "cost_std": 7.112644385768201e-06, "small_model_fraction": 0.43264951169003396}, "unrestricted_random_baseline": {"accuracy": 0.7198, "accuracy_std": 0.009651942809610932, "avg_cost": 0.00039884614, "cost_std": 1.4927440590549973e-05, "small_model_fraction": 0.4279134368137539}}, "0.15": {"accuracy": 0.712, "large_model_usage": 0.504, "small_model_usage": 0.496, "avg_lambda": 0.6999999999999935, "avg_cost": 0.0003639904000000001, "unrestricted": {"accuracy": 0.74, "large_model_usage": 0.504, "avg_cost": 0.00038530240000000005}, "random_baseline": {"accuracy": 0.7061999999999999, "accuracy_std": 0.008506468127254703, "avg_cost": 0.00036849174000000017, "cost_std": 1.3313970046548887e-05, "small_model_fraction": 0.48455864411869576}, "unrestricted_random_baseline": {"accuracy": 0.7184000000000001, "accuracy_std": 0.006800000000000006, "avg_cost": 0.0003873431800000001, "cost_std": 1.3813482087424485e-05, "small_model_fraction": 0.45122468514799446}}, "0.2": {"accuracy": 0.672, "large_model_usage": 0.248, "small_model_usage": 0.752, "avg_lambda": 0.4000000000000035, "avg_cost": 0.0001945464, "unrestricted": {"accuracy": 0.676, "large_model_usage": 0.248, "avg_cost": 0.0002056424}, "random_baseline": {"accuracy": 0.6514, "accuracy_std": 0.008765842800324459, "avg_cost": 0.00019302103999999996, "cost_std": 1.0839335792492029e-05, "small_model_fraction": 0.7495848896062864}, "unrestricted_random_baseline": {"accuracy": 0.6626, "accuracy_std": 0.005063595560468869, "avg_cost": 0.00020907153999999997, "cost_std": 1.3311264338762092e-05, "small_model_fraction": 0.7322297090160601}}, "0.25": {"accuracy": 0.622, "large_model_usage": 0.176, "small_model_usage": 0.8240000000000001, "avg_lambda": 0.10999999999999954, "avg_cost": 0.00013687840000000005, "unrestricted": {"accuracy": 0.662, "large_model_usage": 0.176, "avg_cost": 0.00015769040000000013}, "random_baseline": {"accuracy": 0.6374, "accuracy_std": 0.0078000000000000074, "avg_cost": 0.0001419558800000001, "cost_std": 1.3007624641017314e-05, "small_model_fraction": 0.8397830289607537}, "unrestricted_random_baseline": {"accuracy": 0.6406, "accuracy_std": 0.008765842800324459, "avg_cost": 0.00016222076000000006, "cost_std": 1.3892727644577159e-05, "small_model_fraction": 0.8072311167001381}}}}