{"trial": 13, "trial_seed": 54, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.563, "large_model_accuracy": 0.826, "small_model_cost": 3.4402799999999955e-05, "large_model_cost": 0.0006713359999999993, "hybrid_results": {"0.05": {"accuracy": 0.848, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0007077655999999996, "unrestricted": {"accuracy": 0.848, "large_model_usage": 1.0, "avg_cost": 0.0007077655999999996}, "random_baseline": {"accuracy": 0.8480000000000001, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006732640000000005, "cost_std": 1.0842021724855044e-19, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.8480000000000001, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006732640000000005, "cost_std": 1.0842021724855044e-19, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.73, "large_model_usage": 0.528, "small_model_usage": 0.472, "avg_lambda": 0.7599999999999948, "avg_cost": 0.00039556559999999995, "unrestricted": {"accuracy": 0.74, "large_model_usage": 0.528, "avg_cost": 0.00040322160000000004}, "random_baseline": {"accuracy": 0.7404, "accuracy_std": 0.011164228589562302, "avg_cost": 0.00040674640000000007, "cost_std": 1.519289725602067e-05, "small_model_fraction": 0.4329659688017513}, "unrestricted_random_baseline": {"accuracy": 0.7338, "accuracy_std": 0.008784076502399107, "avg_cost": 0.0004033206600000001, "cost_std": 1.2251401527678472e-05, "small_model_fraction": 0.4209458699907612}}, "0.15": {"accuracy": 0.678, "large_model_usage": 0.328, "small_model_usage": 0.6719999999999999, "avg_lambda": 0.639999999999992, "avg_cost": 0.00025862159999999994, "unrestricted": {"accuracy": 0.676, "large_model_usage": 0.328, "avg_cost": 0.00026476559999999986}, "random_baseline": {"accuracy": 0.6794, "accuracy_std": 0.007485986908885149, "avg_cost": 0.00025822089999999996, "cost_std": 7.010911027391489e-06, "small_model_fraction": 0.6479712472202733}, "unrestricted_random_baseline": {"accuracy": 0.6834, "accuracy_std": 0.009509994742374954, "avg_cost": 0.00026683505999999997, "cost_std": 1.7062216388511768e-05, "small_model_fraction": 0.6383250237230527}}, "0.2": {"accuracy": 0.648, "large_model_usage": 0.24, "small_model_usage": 0.76, "avg_lambda": 0.44999999999999624, "avg_cost": 0.00019140159999999997, "unrestricted": {"accuracy": 0.662, "large_model_usage": 0.24, "avg_cost": 0.00020291360000000008}, "random_baseline": {"accuracy": 0.6519999999999999, "accuracy_std": 0.00638748776906853, "avg_cost": 0.00018457930000000008, "cost_std": 7.344016139551938e-06, "small_model_fraction": 0.7535082171882386}, "unrestricted_random_baseline": {"accuracy": 0.6524, "accuracy_std": 0.01341044369139218, "avg_cost": 0.00020517818000000002, "cost_std": 1.2294098276961984e-05, "small_model_fraction": 0.7354341083177949}}, "0.25": {"accuracy": 0.606, "large_model_usage": 0.154, "small_model_usage": 0.846, "avg_lambda": 0.2699999999999993, "avg_cost": 0.0001271296, "unrestricted": {"accuracy": 0.644, "large_model_usage": 0.154, "avg_cost": 0.00014200560000000005}, "random_baseline": {"accuracy": 0.6266, "accuracy_std": 0.00938296328459193, "avg_cost": 0.00013437552000000004, "cost_std": 1.364532384465832e-05, "small_model_fraction": 0.8544167583036962}, "unrestricted_random_baseline": {"accuracy": 0.6292, "accuracy_std": 0.005810335618533585, "avg_cost": 0.00014448884000000008, "cost_std": 7.3676717140763154e-06, "small_model_fraction": 0.8310610908647874}}}}