{"trial": 15, "trial_seed": 56, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.579, "large_model_accuracy": 0.821, "small_model_cost": 3.4900799999999976e-05, "large_model_cost": 0.0006828799999999996, "hybrid_results": {"0.05": {"accuracy": 0.786, "large_model_usage": 0.828, "small_model_usage": 0.17200000000000004, "avg_lambda": 0.8999999999999925, "avg_cost": 0.0006079966000000002, "unrestricted": {"accuracy": 0.786, "large_model_usage": 0.828, "avg_cost": 0.0006085326000000002}, "random_baseline": {"accuracy": 0.796, "accuracy_std": 0.009165151389911667, "avg_cost": 0.00060534692, "cost_std": 1.3197585617134662e-05, "small_model_fraction": 0.1155645119472963}, "unrestricted_random_baseline": {"accuracy": 0.7942, "accuracy_std": 0.007400000000000006, "avg_cost": 0.0006046956599999999, "cost_std": 9.181007864521228e-06, "small_model_fraction": 0.11473732490178623}}, "0.1": {"accuracy": 0.754, "large_model_usage": 0.612, "small_model_usage": 0.388, "avg_lambda": 0.800000000000007, "avg_cost": 0.0004575326, "unrestricted": {"accuracy": 0.762, "large_model_usage": 0.612, "avg_cost": 0.00046184059999999986}, "random_baseline": {"accuracy": 0.7414, "accuracy_std": 0.005370288632839025, "avg_cost": 0.00046277370000000044, "cost_std": 1.763203085846905e-05, "small_model_fraction": 0.3477694963048193}, "unrestricted_random_baseline": {"accuracy": 0.7412000000000001, "accuracy_std": 0.01590471628165685, "avg_cost": 0.00045759454000000025, "cost_std": 1.2942538608495715e-05, "small_model_fraction": 0.34112113475247335}}, "0.15": {"accuracy": 0.724, "large_model_usage": 0.502, "small_model_usage": 0.498, "avg_lambda": 0.6999999999999935, "avg_cost": 0.0003646765999999998, "unrestricted": {"accuracy": 0.74, "large_model_usage": 0.502, "avg_cost": 0.00038812859999999983}, "random_baseline": {"accuracy": 0.7188, "accuracy_std": 0.012106196760337256, "avg_cost": 0.00036881512000000026, "cost_std": 1.3268522556848503e-05, "small_model_fraction": 0.4910703923829654}, "unrestricted_random_baseline": {"accuracy": 0.7182, "accuracy_std": 0.013310146505579854, "avg_cost": 0.00038785910000000025, "cost_std": 1.866156138403222e-05, "small_model_fraction": 0.4548778726230718}}, "0.2": {"accuracy": 0.668, "large_model_usage": 0.246, "small_model_usage": 0.754, "avg_lambda": 0.44999999999999624, "avg_cost": 0.00019328860000000003, "unrestricted": {"accuracy": 0.688, "large_model_usage": 0.246, "avg_cost": 0.0002017726000000001}, "random_baseline": {"accuracy": 0.6482, "accuracy_std": 0.012975361266646885, "avg_cost": 0.00019081188, "cost_std": 1.1502907892598245e-05, "small_model_fraction": 0.7555665367036472}, "unrestricted_random_baseline": {"accuracy": 0.6531999999999999, "accuracy_std": 0.009474175425861616, "avg_cost": 0.00020345220000000003, "cost_std": 1.1800865001515793e-05, "small_model_fraction": 0.7424735238415057}}, "0.25": {"accuracy": 0.652, "large_model_usage": 0.184, "small_model_usage": 0.8160000000000001, "avg_lambda": 0.29999999999999993, "avg_cost": 0.0001430165999999999, "unrestricted": {"accuracy": 0.676, "large_model_usage": 0.184, "avg_cost": 0.0001583326}, "random_baseline": {"accuracy": 0.6325999999999999, "accuracy_std": 0.009718024490605083, "avg_cost": 0.00014697445999999997, "cost_std": 1.1104246915140224e-05, "small_model_fraction": 0.8331492739273113}, "unrestricted_random_baseline": {"accuracy": 0.6442, "accuracy_std": 0.008874683092933523, "avg_cost": 0.00016293336, "cost_std": 9.302387999132247e-06, "small_model_fraction": 0.8095127127537427}}}}