{"trial": 23, "trial_seed": 64, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.588, "large_model_accuracy": 0.829, "small_model_cost": 3.463520000000001e-05, "large_model_cost": 0.0006764160000000001, "hybrid_results": {"0.05": {"accuracy": 0.786, "large_model_usage": 0.818, "small_model_usage": 0.18200000000000005, "avg_lambda": 0.8600000000000094, "avg_cost": 0.0005826700000000005, "unrestricted": {"accuracy": 0.796, "large_model_usage": 0.818, "avg_cost": 0.000602274}, "random_baseline": {"accuracy": 0.8036000000000001, "accuracy_std": 0.008380930735902792, "avg_cost": 0.0005912076000000003, "cost_std": 4.848683883282127e-06, "small_model_fraction": 0.1460716805488721}, "unrestricted_random_baseline": {"accuracy": 0.8134, "accuracy_std": 0.0057306195127577175, "avg_cost": 0.0006038043200000003, "cost_std": 8.967983396594844e-06, "small_model_fraction": 0.1155254255035366}}, "0.1": {"accuracy": 0.774, "large_model_usage": 0.562, "small_model_usage": 0.43799999999999994, "avg_lambda": 0.7599999999999948, "avg_cost": 0.00042403400000000043, "unrestricted": {"accuracy": 0.78, "large_model_usage": 0.562, "avg_cost": 0.00043007800000000035}, "random_baseline": {"accuracy": 0.7426, "accuracy_std": 0.00885663593019382, "avg_cost": 0.00042568360000000026, "cost_std": 1.0098509512200286e-05, "small_model_fraction": 0.393252649502758}, "unrestricted_random_baseline": {"accuracy": 0.7444, "accuracy_std": 0.010190191362285607, "avg_cost": 0.0004378134600000002, "cost_std": 1.3123008166742928e-05, "small_model_fraction": 0.3838351038236104}}, "0.15": {"accuracy": 0.678, "large_model_usage": 0.304, "small_model_usage": 0.696, "avg_lambda": 0.5, "avg_cost": 0.0002449659999999998, "unrestricted": {"accuracy": 0.684, "large_model_usage": 0.304, "avg_cost": 0.00025373399999999976}, "random_baseline": {"accuracy": 0.6644, "accuracy_std": 0.010725670142233548, "avg_cost": 0.00024383707999999998, "cost_std": 1.0508220003483025e-05, "small_model_fraction": 0.672270033631421}, "unrestricted_random_baseline": {"accuracy": 0.6744000000000001, "accuracy_std": 0.0065604877867426956, "avg_cost": 0.00025427048000000005, "cost_std": 1.4805722007710458e-05, "small_model_fraction": 0.6586080481061451}}, "0.2": {"accuracy": 0.634, "large_model_usage": 0.214, "small_model_usage": 0.786, "avg_lambda": 0.31000000000000166, "avg_cost": 0.00016978199999999978, "unrestricted": {"accuracy": 0.666, "large_model_usage": 0.214, "avg_cost": 0.00018922199999999985}, "random_baseline": {"accuracy": 0.641, "accuracy_std": 0.010246950765959608, "avg_cost": 0.00017612885999999998, "cost_std": 1.1714746434831608e-05, "small_model_fraction": 0.7894190664476101}, "unrestricted_random_baseline": {"accuracy": 0.6529999999999999, "accuracy_std": 0.007000000000000006, "avg_cost": 0.00019616249999999995, "cost_std": 1.1399736923718906e-05, "small_model_fraction": 0.7591283503651094}}, "0.25": {"accuracy": 0.61, "large_model_usage": 0.14, "small_model_usage": 0.86, "avg_lambda": 0.05000000000000044, "avg_cost": 0.00012002200000000004, "unrestricted": {"accuracy": 0.638, "large_model_usage": 0.14, "avg_cost": 0.0001387619999999999}, "random_baseline": {"accuracy": 0.6164000000000001, "accuracy_std": 0.00902441133814279, "avg_cost": 0.00012035628000000007, "cost_std": 9.896980742509262e-06, "small_model_fraction": 0.8669533273666025}, "unrestricted_random_baseline": {"accuracy": 0.6256, "accuracy_std": 0.007631513611335572, "avg_cost": 0.000136564, "cost_std": 8.213511334867676e-06, "small_model_fraction": 0.8377533263693775}}}}