{"trial": 16, "trial_seed": 57, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.608, "large_model_accuracy": 0.831, "small_model_cost": 3.456690000000001e-05, "large_model_cost": 0.000675418, "hybrid_results": {"0.05": {"accuracy": 0.814, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0007114249999999998, "unrestricted": {"accuracy": 0.814, "large_model_usage": 1.0, "avg_cost": 0.0007114249999999998}, "random_baseline": {"accuracy": 0.8139999999999998, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.000676772, "cost_std": 0.0, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.8139999999999998, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.000676772, "cost_std": 0.0, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.758, "large_model_usage": 0.644, "small_model_usage": 0.356, "avg_lambda": 0.800000000000007, "avg_cost": 0.0004744530000000001, "unrestricted": {"accuracy": 0.764, "large_model_usage": 0.644, "avg_cost": 0.0004784010000000002}, "random_baseline": {"accuracy": 0.7492000000000001, "accuracy_std": 0.01357055636294991, "avg_cost": 0.0004807402199999999, "cost_std": 9.172878173158039e-06, "small_model_fraction": 0.3135907857535079}, "unrestricted_random_baseline": {"accuracy": 0.7534, "accuracy_std": 0.007158212067269319, "avg_cost": 0.00048582865999999985, "cost_std": 1.07474065329456e-05, "small_model_fraction": 0.3074302283322909}}, "0.15": {"accuracy": 0.716, "large_model_usage": 0.554, "small_model_usage": 0.44599999999999995, "avg_lambda": 0.7300000000000023, "avg_cost": 0.00039318900000000006, "unrestricted": {"accuracy": 0.754, "large_model_usage": 0.554, "avg_cost": 0.00041689300000000043}, "random_baseline": {"accuracy": 0.7272000000000001, "accuracy_std": 0.005528109984434106, "avg_cost": 0.0003934219599999999, "cost_std": 9.283937081992629e-06, "small_model_fraction": 0.44039715309843414}, "unrestricted_random_baseline": {"accuracy": 0.7268, "accuracy_std": 0.010205880657738468, "avg_cost": 0.00042558323999999985, "cost_std": 1.2417501360193268e-05, "small_model_fraction": 0.4034088417730726}}, "0.2": {"accuracy": 0.676, "large_model_usage": 0.266, "small_model_usage": 0.734, "avg_lambda": 0.47999999999999593, "avg_cost": 0.00021030100000000009, "unrestricted": {"accuracy": 0.684, "large_model_usage": 0.266, "avg_cost": 0.0002176370000000001}, "random_baseline": {"accuracy": 0.6676, "accuracy_std": 0.010688311372709921, "avg_cost": 0.00021179796000000002, "cost_std": 1.3797050776974014e-05, "small_model_fraction": 0.725780138319182}, "unrestricted_random_baseline": {"accuracy": 0.6656000000000001, "accuracy_std": 0.01310877568653914, "avg_cost": 0.00021590280000000002, "cost_std": 1.2279534481404384e-05, "small_model_fraction": 0.7143328614088356}}, "0.25": {"accuracy": 0.626, "large_model_usage": 0.172, "small_model_usage": 0.8280000000000001, "avg_lambda": 0.10999999999999954, "avg_cost": 0.00013308500000000002, "unrestricted": {"accuracy": 0.664, "large_model_usage": 0.172, "avg_cost": 0.00015252100000000016}, "random_baseline": {"accuracy": 0.6378000000000001, "accuracy_std": 0.007560423268574327, "avg_cost": 0.00013171249999999998, "cost_std": 8.247104539291336e-06, "small_model_fraction": 0.8462699057550185}, "unrestricted_random_baseline": {"accuracy": 0.6426, "accuracy_std": 0.004820788317277584, "avg_cost": 0.00015278820000000004, "cost_std": 1.7072716505817126e-05, "small_model_fraction": 0.8159414878120671}}}}