{"trial": 19, "trial_seed": 60, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.578, "large_model_accuracy": 0.824, "small_model_cost": 3.501389999999993e-05, "large_model_cost": 0.0006840139999999998, "hybrid_results": {"0.05": {"accuracy": 0.826, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0007159555999999992, "unrestricted": {"accuracy": 0.826, "large_model_usage": 1.0, "avg_cost": 0.0007159555999999992}, "random_baseline": {"accuracy": 0.826, "accuracy_std": 0.0, "avg_cost": 0.0006811440000000001, "cost_std": 1.0842021724855044e-19, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.826, "accuracy_std": 0.0, "avg_cost": 0.0006811440000000001, "cost_std": 1.0842021724855044e-19, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.732, "large_model_usage": 0.54, "small_model_usage": 0.45999999999999996, "avg_lambda": 0.7799999999999914, "avg_cost": 0.00041077560000000036, "unrestricted": {"accuracy": 0.734, "large_model_usage": 0.54, "avg_cost": 0.0004130396000000004}, "random_baseline": {"accuracy": 0.7202, "accuracy_std": 0.01521709564930182, "avg_cost": 0.00040833744000000024, "cost_std": 1.3805845474667535e-05, "small_model_fraction": 0.42101441895001174}, "unrestricted_random_baseline": {"accuracy": 0.7222, "accuracy_std": 0.009856977224281294, "avg_cost": 0.00040704416000000013, "cost_std": 1.2203586336417681e-05, "small_model_fraction": 0.417525975727892}}, "0.15": {"accuracy": 0.702, "large_model_usage": 0.512, "small_model_usage": 0.488, "avg_lambda": 0.6999999999999935, "avg_cost": 0.0003681796000000003, "unrestricted": {"accuracy": 0.728, "large_model_usage": 0.512, "avg_cost": 0.00039203160000000035}, "random_baseline": {"accuracy": 0.713, "accuracy_std": 0.013030732903409552, "avg_cost": 0.0003724332800000002, "cost_std": 1.2417879378766758e-05, "small_model_fraction": 0.486647690809292}, "unrestricted_random_baseline": {"accuracy": 0.7212, "accuracy_std": 0.018269099594670796, "avg_cost": 0.0003892150400000001, "cost_std": 1.10325587664149e-05, "small_model_fraction": 0.44989577043208395}}, "0.2": {"accuracy": 0.66, "large_model_usage": 0.28, "small_model_usage": 0.72, "avg_lambda": 0.5, "avg_cost": 0.00022114759999999981, "unrestricted": {"accuracy": 0.668, "large_model_usage": 0.28, "avg_cost": 0.00023018359999999979}, "random_baseline": {"accuracy": 0.655, "accuracy_std": 0.006526867548832294, "avg_cost": 0.00022187159999999994, "cost_std": 9.003344853997394e-06, "small_model_fraction": 0.7131992737751506}, "unrestricted_random_baseline": {"accuracy": 0.6542, "accuracy_std": 0.01086093918590838, "avg_cost": 0.00022050663999999986, "cost_std": 1.0370379547075434e-05, "small_model_fraction": 0.6992763175229096}}, "0.25": {"accuracy": 0.606, "large_model_usage": 0.18, "small_model_usage": 0.8200000000000001, "avg_lambda": 0.28000000000000097, "avg_cost": 0.00014329559999999998, "unrestricted": {"accuracy": 0.648, "large_model_usage": 0.18, "avg_cost": 0.00016021159999999983}, "random_baseline": {"accuracy": 0.6214000000000001, "accuracy_std": 0.007851114570556216, "avg_cost": 0.00014380363999999993, "cost_std": 7.598999170311843e-06, "small_model_fraction": 0.8331561119944357}, "unrestricted_random_baseline": {"accuracy": 0.6295999999999999, "accuracy_std": 0.010910545357588694, "avg_cost": 0.00015397875999999987, "cost_std": 1.6575650823252698e-05, "small_model_fraction": 0.8070914010644992}}}}