{"trial": 3, "trial_seed": 44, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.596, "large_model_accuracy": 0.829, "small_model_cost": 3.436219999999997e-05, "large_model_cost": 0.0006723399999999992, "hybrid_results": {"0.05": {"accuracy": 0.762, "large_model_usage": 0.654, "small_model_usage": 0.346, "avg_lambda": 0.810000000000002, "avg_cost": 0.0004896269999999999, "unrestricted": {"accuracy": 0.768, "large_model_usage": 0.654, "avg_cost": 0.0004931149999999998}, "random_baseline": {"accuracy": 0.7476, "accuracy_std": 0.011892854997854814, "avg_cost": 0.0004956801400000001, "cost_std": 1.913354905187209e-05, "small_model_fraction": 0.2863939779722736}, "unrestricted_random_baseline": {"accuracy": 0.7567999999999999, "accuracy_std": 0.009558242516278825, "avg_cost": 0.00049836402, "cost_std": 9.080441526908177e-06, "small_model_fraction": 0.280926703092176}}, "0.1": {"accuracy": 0.706, "large_model_usage": 0.524, "small_model_usage": 0.476, "avg_lambda": 0.6999999999999935, "avg_cost": 0.00037719100000000003, "unrestricted": {"accuracy": 0.738, "large_model_usage": 0.524, "avg_cost": 0.0004014349999999999}, "random_baseline": {"accuracy": 0.7102, "accuracy_std": 0.015606408939919516, "avg_cost": 0.00038040921999999994, "cost_std": 1.204492525388175e-05, "small_model_fraction": 0.4626320853170746}, "unrestricted_random_baseline": {"accuracy": 0.7214, "accuracy_std": 0.010992724866929047, "avg_cost": 0.0004122920199999999, "cost_std": 1.0282695406341546e-05, "small_model_fraction": 0.4246307630140103}}, "0.15": {"accuracy": 0.66, "large_model_usage": 0.296, "small_model_usage": 0.704, "avg_lambda": 0.5, "avg_cost": 0.00022938299999999991, "unrestricted": {"accuracy": 0.672, "large_model_usage": 0.296, "avg_cost": 0.00023962299999999995}, "random_baseline": {"accuracy": 0.6514, "accuracy_std": 0.010734989520255723, "avg_cost": 0.0002393385199999999, "cost_std": 1.1435369458727582e-05, "small_model_fraction": 0.6943141281718578}, "unrestricted_random_baseline": {"accuracy": 0.66, "accuracy_std": 0.01019803902718558, "avg_cost": 0.00025014329999999987, "cost_std": 1.073056164578528e-05, "small_model_fraction": 0.6782634129275341}}, "0.2": {"accuracy": 0.61, "large_model_usage": 0.198, "small_model_usage": 0.802, "avg_lambda": 0.31000000000000166, "avg_cost": 0.000155087, "unrestricted": {"accuracy": 0.64, "large_model_usage": 0.198, "avg_cost": 0.00017060299999999988}, "random_baseline": {"accuracy": 0.6224, "accuracy_std": 0.009414881836751864, "avg_cost": 0.00015601678, "cost_std": 1.5110411931499403e-05, "small_model_fraction": 0.8107695910421958}, "unrestricted_random_baseline": {"accuracy": 0.6312000000000001, "accuracy_std": 0.012592060990957766, "avg_cost": 0.00017531803999999986, "cost_std": 7.295078909950199e-06, "small_model_fraction": 0.7864489955606605}}, "0.25": {"accuracy": 0.582, "large_model_usage": 0.02, "small_model_usage": 0.98, "avg_lambda": 0.0, "avg_cost": 4.500699999999997e-05, "unrestricted": {"accuracy": 0.588, "large_model_usage": 0.02, "avg_cost": 4.670299999999998e-05}, "random_baseline": {"accuracy": 0.5786, "accuracy_std": 0.004476605857119883, "avg_cost": 4.573991999999999e-05, "cost_std": 4.433926345080622e-06, "small_model_fraction": 0.9833147799186742}, "unrestricted_random_baseline": {"accuracy": 0.5818000000000001, "accuracy_std": 0.004044749683231341, "avg_cost": 4.814955999999999e-05, "cost_std": 5.053094873283108e-06, "small_model_fraction": 0.9806563802063333}}}}