{"trial": 2, "trial_seed": 43, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.584, "large_model_accuracy": 0.833, "small_model_cost": 3.459219999999998e-05, "large_model_cost": 0.0006757880000000007, "hybrid_results": {"0.05": {"accuracy": 0.832, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0007069269999999994, "unrestricted": {"accuracy": 0.832, "large_model_usage": 1.0, "avg_cost": 0.0007069269999999994}, "random_baseline": {"accuracy": 0.8320000000000001, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006725079999999998, "cost_std": 0.0, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.8320000000000001, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006725079999999998, "cost_std": 0.0, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.73, "large_model_usage": 0.516, "small_model_usage": 0.484, "avg_lambda": 0.7599999999999948, "avg_cost": 0.0003822630000000002, "unrestricted": {"accuracy": 0.734, "large_model_usage": 0.516, "avg_cost": 0.00038847900000000017}, "random_baseline": {"accuracy": 0.7219999999999999, "accuracy_std": 0.012296340919151529, "avg_cost": 0.0003839882400000001, "cost_std": 1.2809270210531207e-05, "small_model_fraction": 0.4577774838824587}, "unrestricted_random_baseline": {"accuracy": 0.7181999999999998, "accuracy_std": 0.013782597723216054, "avg_cost": 0.00039514814000000026, "cost_std": 1.4269901062880546e-05, "small_model_fraction": 0.44808309723800477}}, "0.15": {"accuracy": 0.704, "large_model_usage": 0.492, "small_model_usage": 0.508, "avg_lambda": 0.6999999999999935, "avg_cost": 0.00034987100000000013, "unrestricted": {"accuracy": 0.728, "large_model_usage": 0.492, "avg_cost": 0.00036964700000000013}, "random_baseline": {"accuracy": 0.6998, "accuracy_std": 0.009357350052231653, "avg_cost": 0.00034444266, "cost_std": 1.64563086879288e-05, "small_model_fraction": 0.5082955939511772}, "unrestricted_random_baseline": {"accuracy": 0.7156, "accuracy_std": 0.013320660644277383, "avg_cost": 0.0003751863400000001, "cost_std": 1.4955090210105756e-05, "small_model_fraction": 0.4774532209973931}}, "0.2": {"accuracy": 0.66, "large_model_usage": 0.264, "small_model_usage": 0.736, "avg_lambda": 0.44999999999999624, "avg_cost": 0.00020369899999999987, "unrestricted": {"accuracy": 0.666, "large_model_usage": 0.264, "avg_cost": 0.00021323099999999984}, "random_baseline": {"accuracy": 0.6472, "accuracy_std": 0.00976524449258697, "avg_cost": 0.00019231151999999995, "cost_std": 1.169458957371315e-05, "small_model_fraction": 0.7362634003529036}, "unrestricted_random_baseline": {"accuracy": 0.6498, "accuracy_std": 0.005688585061331161, "avg_cost": 0.00021353294, "cost_std": 1.2584670699243565e-05, "small_model_fraction": 0.7213974264959322}}, "0.25": {"accuracy": 0.606, "large_model_usage": 0.132, "small_model_usage": 0.868, "avg_lambda": 0.079999999999999, "avg_cost": 0.00011013899999999997, "unrestricted": {"accuracy": 0.636, "large_model_usage": 0.132, "avg_cost": 0.00012387499999999988}, "random_baseline": {"accuracy": 0.6134000000000001, "accuracy_std": 0.00759210115844093, "avg_cost": 0.00011254231999999987, "cost_std": 8.560659901759904e-06, "small_model_fraction": 0.8821782675432374}, "unrestricted_random_baseline": {"accuracy": 0.614, "accuracy_std": 0.005865151319446077, "avg_cost": 0.0001198124199999999, "cost_std": 8.250546128081466e-06, "small_model_fraction": 0.8607557940959693}}}}