{"trial": 5, "trial_seed": 46, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.589, "large_model_accuracy": 0.817, "small_model_cost": 3.444450000000002e-05, "large_model_cost": 0.0006728339999999998, "hybrid_results": {"0.05": {"accuracy": 0.764, "large_model_usage": 0.808, "small_model_usage": 0.19199999999999995, "avg_lambda": 0.8999999999999925, "avg_cost": 0.0005813904000000003, "unrestricted": {"accuracy": 0.764, "large_model_usage": 0.808, "avg_cost": 0.0005820064000000003}, "random_baseline": {"accuracy": 0.7716000000000001, "accuracy_std": 0.008138795979750328, "avg_cost": 0.0005708618199999998, "cost_std": 6.78575748134279e-06, "small_model_fraction": 0.1432410777432891}, "unrestricted_random_baseline": {"accuracy": 0.776, "accuracy_std": 0.004098780306383843, "avg_cost": 0.0005840612399999997, "cost_std": 9.361023443107026e-06, "small_model_fraction": 0.14227614959205862}}, "0.1": {"accuracy": 0.74, "large_model_usage": 0.544, "small_model_usage": 0.45599999999999996, "avg_lambda": 0.7599999999999948, "avg_cost": 0.0004073664, "unrestricted": {"accuracy": 0.744, "large_model_usage": 0.544, "avg_cost": 0.0004110024}, "random_baseline": {"accuracy": 0.7106, "accuracy_std": 0.012362847568420484, "avg_cost": 0.0004051166599999999, "cost_std": 1.6552860511718136e-05, "small_model_fraction": 0.41583954623313796}, "unrestricted_random_baseline": {"accuracy": 0.7076, "accuracy_std": 0.013469966592386175, "avg_cost": 0.00039939545999999993, "cost_std": 1.2277557075265338e-05, "small_model_fraction": 0.4101439638339914}}, "0.15": {"accuracy": 0.68, "large_model_usage": 0.37, "small_model_usage": 0.63, "avg_lambda": 0.6299999999999967, "avg_cost": 0.00028419439999999996, "unrestricted": {"accuracy": 0.684, "large_model_usage": 0.37, "avg_cost": 0.0002910103999999999}, "random_baseline": {"accuracy": 0.6676000000000001, "accuracy_std": 0.013047605144240065, "avg_cost": 0.00028103242, "cost_std": 1.0968301245480113e-05, "small_model_fraction": 0.6087813161087391}, "unrestricted_random_baseline": {"accuracy": 0.676, "accuracy_std": 0.00903327183250898, "avg_cost": 0.00029225734000000014, "cost_std": 1.4789742182485845e-05, "small_model_fraction": 0.5981044487730452}}, "0.2": {"accuracy": 0.644, "large_model_usage": 0.276, "small_model_usage": 0.724, "avg_lambda": 0.4000000000000035, "avg_cost": 0.0002090864000000001, "unrestricted": {"accuracy": 0.668, "large_model_usage": 0.276, "avg_cost": 0.00022379440000000003}, "random_baseline": {"accuracy": 0.6368, "accuracy_std": 0.009260669522232189, "avg_cost": 0.00020741375999999996, "cost_std": 1.5044545324282855e-05, "small_model_fraction": 0.7264336271194934}, "unrestricted_random_baseline": {"accuracy": 0.648, "accuracy_std": 0.011832159566199242, "avg_cost": 0.00022127912000000002, "cost_std": 1.5963101232830674e-05, "small_model_fraction": 0.7033944010670599}}, "0.25": {"accuracy": 0.602, "large_model_usage": 0.146, "small_model_usage": 0.854, "avg_lambda": 0.05999999999999949, "avg_cost": 0.00011787439999999992, "unrestricted": {"accuracy": 0.636, "large_model_usage": 0.146, "avg_cost": 0.00013556240000000002}, "random_baseline": {"accuracy": 0.6092, "accuracy_std": 0.010127191120937738, "avg_cost": 0.00011677328000000002, "cost_std": 1.2423344726505838e-05, "small_model_fraction": 0.8693119169409897}, "unrestricted_random_baseline": {"accuracy": 0.6164, "accuracy_std": 0.007364781055808794, "avg_cost": 0.00013493488000000003, "cost_std": 1.2640156793711074e-05, "small_model_fraction": 0.8416046943127979}}}}