{"trial": 7, "trial_seed": 48, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.604, "large_model_accuracy": 0.836, "small_model_cost": 3.457159999999999e-05, "large_model_cost": 0.0006758320000000003, "hybrid_results": {"0.05": {"accuracy": 0.838, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0007152458000000006, "unrestricted": {"accuracy": 0.838, "large_model_usage": 1.0, "avg_cost": 0.0007152458000000006}, "random_baseline": {"accuracy": 0.8379999999999999, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006804359999999998, "cost_std": 0.0, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.8379999999999999, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006804359999999998, "cost_std": 0.0, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.768, "large_model_usage": 0.576, "small_model_usage": 0.42400000000000004, "avg_lambda": 0.7900000000000055, "avg_cost": 0.0004309218, "unrestricted": {"accuracy": 0.772, "large_model_usage": 0.576, "avg_cost": 0.00043473779999999993}, "random_baseline": {"accuracy": 0.752, "accuracy_std": 0.012585706178041832, "avg_cost": 0.0004363408199999998, "cost_std": 8.119878438228048e-06, "small_model_fraction": 0.3819200437139112}, "unrestricted_random_baseline": {"accuracy": 0.7542, "accuracy_std": 0.010675204916066024, "avg_cost": 0.0004459048, "cost_std": 1.559119377648796e-05, "small_model_fraction": 0.3759692630326156}}, "0.15": {"accuracy": 0.746, "large_model_usage": 0.55, "small_model_usage": 0.44999999999999996, "avg_lambda": 0.6999999999999935, "avg_cost": 0.0003951217999999999, "unrestricted": {"accuracy": 0.768, "large_model_usage": 0.55, "avg_cost": 0.0004174497999999999}, "random_baseline": {"accuracy": 0.74, "accuracy_std": 0.010733126291999, "avg_cost": 0.00040156385999999994, "cost_std": 1.7570388247286896e-05, "small_model_fraction": 0.437747598323552}, "unrestricted_random_baseline": {"accuracy": 0.7490000000000001, "accuracy_std": 0.006884765791223408, "avg_cost": 0.00042485929999999986, "cost_std": 1.773996575131973e-05, "small_model_fraction": 0.402928669850813}}, "0.2": {"accuracy": 0.698, "large_model_usage": 0.302, "small_model_usage": 0.698, "avg_lambda": 0.44999999999999624, "avg_cost": 0.0002371858, "unrestricted": {"accuracy": 0.708, "large_model_usage": 0.302, "avg_cost": 0.0002475057999999999}, "random_baseline": {"accuracy": 0.6895999999999999, "accuracy_std": 0.008236504112789563, "avg_cost": 0.00023541144000000002, "cost_std": 1.0862698815966531e-05, "small_model_fraction": 0.6840375610282501}, "unrestricted_random_baseline": {"accuracy": 0.6904, "accuracy_std": 0.009286549412995082, "avg_cost": 0.00025214114, "cost_std": 1.1853094352125896e-05, "small_model_fraction": 0.6679442547832365}}, "0.25": {"accuracy": 0.65, "large_model_usage": 0.174, "small_model_usage": 0.8260000000000001, "avg_lambda": 0.079999999999999, "avg_cost": 0.0001362098, "unrestricted": {"accuracy": 0.676, "large_model_usage": 0.174, "avg_cost": 0.0001568577999999999}, "random_baseline": {"accuracy": 0.6504000000000001, "accuracy_std": 0.006499230723708773, "avg_cost": 0.000133117, "cost_std": 5.696465087753975e-06, "small_model_fraction": 0.8415024536054307}, "unrestricted_random_baseline": {"accuracy": 0.6557999999999999, "accuracy_std": 0.005963220606350235, "avg_cost": 0.00015904919999999996, "cost_std": 9.590881607443588e-06, "small_model_fraction": 0.8093033656842059}}}}