{"trial": 30, "trial_seed": 71, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.594, "large_model_accuracy": 0.828, "small_model_cost": 3.480599999999999e-05, "large_model_cost": 0.0006797680000000004, "hybrid_results": {"0.05": {"accuracy": 0.842, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0007073146000000001, "unrestricted": {"accuracy": 0.842, "large_model_usage": 1.0, "avg_cost": 0.0007073146000000001}, "random_baseline": {"accuracy": 0.842, "accuracy_std": 0.0, "avg_cost": 0.0006728360000000005, "cost_std": 0.0, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.842, "accuracy_std": 0.0, "avg_cost": 0.0006728360000000005, "cost_std": 0.0, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.766, "large_model_usage": 0.654, "small_model_usage": 0.346, "avg_lambda": 0.800000000000007, "avg_cost": 0.0004780226000000001, "unrestricted": {"accuracy": 0.774, "large_model_usage": 0.654, "avg_cost": 0.0004844225999999999}, "random_baseline": {"accuracy": 0.7678, "accuracy_std": 0.0124402572320672, "avg_cost": 0.0004696962000000001, "cost_std": 1.1070624801879948e-05, "small_model_fraction": 0.3128019945361125}, "unrestricted_random_baseline": {"accuracy": 0.771, "accuracy_std": 0.01230447073221763, "avg_cost": 0.00048345494000000006, "cost_std": 8.940564257383257e-06, "small_model_fraction": 0.3028789293012617}}, "0.15": {"accuracy": 0.72, "large_model_usage": 0.538, "small_model_usage": 0.46199999999999997, "avg_lambda": 0.6999999999999935, "avg_cost": 0.00038282660000000005, "unrestricted": {"accuracy": 0.75, "large_model_usage": 0.538, "avg_cost": 0.00040641859999999996}, "random_baseline": {"accuracy": 0.7354, "accuracy_std": 0.011315476127852518, "avg_cost": 0.00037389567999999995, "cost_std": 1.2434767785913843e-05, "small_model_fraction": 0.46040138798874997}, "unrestricted_random_baseline": {"accuracy": 0.7402, "accuracy_std": 0.010017983829094563, "avg_cost": 0.00040350573999999994, "cost_std": 1.1960200706024992e-05, "small_model_fraction": 0.42382248876678047}}, "0.2": {"accuracy": 0.672, "large_model_usage": 0.234, "small_model_usage": 0.766, "avg_lambda": 0.4000000000000035, "avg_cost": 0.00018571460000000003, "unrestricted": {"accuracy": 0.678, "large_model_usage": 0.234, "avg_cost": 0.00019780260000000022}, "random_baseline": {"accuracy": 0.6608, "accuracy_std": 0.01085172797300044, "avg_cost": 0.00018726742000000008, "cost_std": 1.005138935290047e-05, "small_model_fraction": 0.7660193933906184}, "unrestricted_random_baseline": {"accuracy": 0.6658000000000001, "accuracy_std": 0.00524976189936268, "avg_cost": 0.00019543830000000008, "cost_std": 1.3181617441346169e-05, "small_model_fraction": 0.7472772039282932}}, "0.25": {"accuracy": 0.608, "large_model_usage": 0.028, "small_model_usage": 0.972, "avg_lambda": 0.01999999999999975, "avg_cost": 4.913460000000001e-05, "unrestricted": {"accuracy": 0.618, "large_model_usage": 0.028, "avg_cost": 5.23186e-05}, "random_baseline": {"accuracy": 0.6044, "accuracy_std": 0.0024979991993593614, "avg_cost": 4.639247999999999e-05, "cost_std": 3.2910948818896024e-06, "small_model_fraction": 0.9777838074181113}, "unrestricted_random_baseline": {"accuracy": 0.6060000000000001, "accuracy_std": 0.0040987803063838426, "avg_cost": 5.1352599999999993e-05, "cost_std": 3.978906080821711e-06, "small_model_fraction": 0.972847082463773}}}}