{"trial": 6, "trial_seed": 47, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.596, "large_model_accuracy": 0.833, "small_model_cost": 3.464059999999996e-05, "large_model_cost": 0.0006767080000000006, "hybrid_results": {"0.05": {"accuracy": 0.776, "large_model_usage": 0.794, "small_model_usage": 0.20599999999999996, "avg_lambda": 0.8600000000000094, "avg_cost": 0.0005568913999999995, "unrestricted": {"accuracy": 0.792, "large_model_usage": 0.794, "avg_cost": 0.0005741753999999996}, "random_baseline": {"accuracy": 0.7942000000000001, "accuracy_std": 0.009357350052231676, "avg_cost": 0.0005541260399999999, "cost_std": 1.1608155156371674e-05, "small_model_fraction": 0.18661062685942467}, "unrestricted_random_baseline": {"accuracy": 0.7960000000000002, "accuracy_std": 0.006985699678629199, "avg_cost": 0.0005662482399999997, "cost_std": 1.138713261319099e-05, "small_model_fraction": 0.15969133458574736}}, "0.1": {"accuracy": 0.752, "large_model_usage": 0.522, "small_model_usage": 0.478, "avg_lambda": 0.7599999999999948, "avg_cost": 0.0003851273999999998, "unrestricted": {"accuracy": 0.756, "large_model_usage": 0.522, "avg_cost": 0.0003927313999999998}, "random_baseline": {"accuracy": 0.7308, "accuracy_std": 0.010205880657738468, "avg_cost": 0.00038428645999999975, "cost_std": 8.954430216177866e-06, "small_model_fraction": 0.45412771307186833}, "unrestricted_random_baseline": {"accuracy": 0.7390000000000001, "accuracy_std": 0.01132254388377454, "avg_cost": 0.00039320269999999996, "cost_std": 1.1233363458644035e-05, "small_model_fraction": 0.44228471964158356}}, "0.15": {"accuracy": 0.69, "large_model_usage": 0.298, "small_model_usage": 0.702, "avg_lambda": 0.5800000000000031, "avg_cost": 0.0002302834, "unrestricted": {"accuracy": 0.692, "large_model_usage": 0.298, "avg_cost": 0.00023944339999999983}, "random_baseline": {"accuracy": 0.6688000000000001, "accuracy_std": 0.012399999999999993, "avg_cost": 0.00022646939999999984, "cost_std": 1.8048285236664465e-05, "small_model_fraction": 0.6952924256861509}, "unrestricted_random_baseline": {"accuracy": 0.6744000000000001, "accuracy_std": 0.010762899237658944, "avg_cost": 0.00023655475999999977, "cost_std": 1.6286593007206927e-05, "small_model_fraction": 0.6810260106649245}}, "0.2": {"accuracy": 0.652, "large_model_usage": 0.188, "small_model_usage": 0.812, "avg_lambda": 0.3899999999999957, "avg_cost": 0.0001534114000000001, "unrestricted": {"accuracy": 0.66, "large_model_usage": 0.188, "avg_cost": 0.00016349540000000005}, "random_baseline": {"accuracy": 0.6489999999999999, "accuracy_std": 0.008306623862918082, "avg_cost": 0.0001518997800000001, "cost_std": 1.0769317712260043e-05, "small_model_fraction": 0.8150181741044631}, "unrestricted_random_baseline": {"accuracy": 0.647, "accuracy_std": 0.009808159868191392, "avg_cost": 0.00015381171999999998, "cost_std": 1.334963539403221e-05, "small_model_fraction": 0.7993126578300035}}, "0.25": {"accuracy": 0.61, "large_model_usage": 0.122, "small_model_usage": 0.878, "avg_lambda": 0.05999999999999949, "avg_cost": 0.0001022314000000001, "unrestricted": {"accuracy": 0.64, "large_model_usage": 0.122, "avg_cost": 0.00011641540000000011}, "random_baseline": {"accuracy": 0.6266, "accuracy_std": 0.007748548251124214, "avg_cost": 0.00010068308000000011, "cost_std": 1.3284523558246296e-05, "small_model_fraction": 0.8947294318322343}, "unrestricted_random_baseline": {"accuracy": 0.6314000000000001, "accuracy_std": 0.006575712889109444, "avg_cost": 0.00011525478000000014, "cost_std": 7.89489314352007e-06, "small_model_fraction": 0.8726382931137757}}}}