{"trial": 1, "trial_seed": 42, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.599, "large_model_accuracy": 0.837, "small_model_cost": 3.481870000000001e-05, "large_model_cost": 0.0006812459999999995, "hybrid_results": {"0.05": {"accuracy": 0.82, "large_model_usage": 0.81, "small_model_usage": 0.18999999999999995, "avg_lambda": 0.8999999999999925, "avg_cost": 0.0005947121999999996, "unrestricted": {"accuracy": 0.822, "large_model_usage": 0.81, "avg_cost": 0.0005956641999999996}, "random_baseline": {"accuracy": 0.8236000000000001, "accuracy_std": 0.007578918128598553, "avg_cost": 0.0005950598199999996, "cost_std": 9.445405880723154e-06, "small_model_fraction": 0.13386470528085678}, "unrestricted_random_baseline": {"accuracy": 0.8140000000000001, "accuracy_std": 0.007694153624668496, "avg_cost": 0.0005921676799999997, "cost_std": 6.48857909696725e-06, "small_model_fraction": 0.13239199520193526}}, "0.1": {"accuracy": 0.784, "large_model_usage": 0.542, "small_model_usage": 0.45799999999999996, "avg_lambda": 0.7599999999999948, "avg_cost": 0.0004086962, "unrestricted": {"accuracy": 0.788, "large_model_usage": 0.542, "avg_cost": 0.0004140682000000001}, "random_baseline": {"accuracy": 0.75, "accuracy_std": 0.01258570617804183, "avg_cost": 0.00040636325999999996, "cost_std": 6.9568976724112e-06, "small_model_fraction": 0.42162482927314443}, "unrestricted_random_baseline": {"accuracy": 0.7486, "accuracy_std": 0.012459534501738027, "avg_cost": 0.0004024597799999999, "cost_std": 1.7229580202071084e-05, "small_model_fraction": 0.41331453668494456}}, "0.15": {"accuracy": 0.718, "large_model_usage": 0.346, "small_model_usage": 0.654, "avg_lambda": 0.6100000000000078, "avg_cost": 0.00026924420000000004, "unrestricted": {"accuracy": 0.726, "large_model_usage": 0.346, "avg_cost": 0.00027845220000000023}, "random_baseline": {"accuracy": 0.6940000000000001, "accuracy_std": 0.012521980673998782, "avg_cost": 0.00026882512000000016, "cost_std": 1.2092850237789207e-05, "small_model_fraction": 0.6373521044052437}, "unrestricted_random_baseline": {"accuracy": 0.7022, "accuracy_std": 0.009610411021387155, "avg_cost": 0.0002841587600000001, "cost_std": 1.3857361838329767e-05, "small_model_fraction": 0.6231076564990365}}, "0.2": {"accuracy": 0.684, "large_model_usage": 0.238, "small_model_usage": 0.762, "avg_lambda": 0.4000000000000035, "avg_cost": 0.00018983219999999987, "unrestricted": {"accuracy": 0.7, "large_model_usage": 0.238, "avg_cost": 0.00020116820000000004}, "random_baseline": {"accuracy": 0.6611999999999999, "accuracy_std": 0.008400000000000008, "avg_cost": 0.00018795226000000012, "cost_std": 1.4072018773452512e-05, "small_model_fraction": 0.7601996388456986}, "unrestricted_random_baseline": {"accuracy": 0.6708000000000001, "accuracy_std": 0.008207313811473283, "avg_cost": 0.00020202626000000016, "cost_std": 6.793353972847324e-06, "small_model_fraction": 0.7426632507630787}}, "0.25": {"accuracy": 0.63, "large_model_usage": 0.12, "small_model_usage": 0.88, "avg_lambda": 0.05000000000000044, "avg_cost": 0.00010418019999999991, "unrestricted": {"accuracy": 0.666, "large_model_usage": 0.12, "avg_cost": 0.00011909219999999996}, "random_baseline": {"accuracy": 0.6306, "accuracy_std": 0.008101851640211646, "avg_cost": 9.974824e-05, "cost_std": 8.172993397672611e-06, "small_model_fraction": 0.8927002309463108}, "unrestricted_random_baseline": {"accuracy": 0.639, "accuracy_std": 0.0050000000000000044, "avg_cost": 0.00011887375999999994, "cost_std": 8.779841719666692e-06, "small_model_fraction": 0.8696318982815238}}}}