{"trial": 27, "trial_seed": 68, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.598, "large_model_accuracy": 0.824, "small_model_cost": 3.4399400000000015e-05, "large_model_cost": 0.0006710759999999995, "hybrid_results": {"0.05": {"accuracy": 0.812, "large_model_usage": 0.794, "small_model_usage": 0.20599999999999996, "avg_lambda": 0.8799999999999963, "avg_cost": 0.0005694784000000004, "unrestricted": {"accuracy": 0.822, "large_model_usage": 0.794, "avg_cost": 0.0005824824000000003}, "random_baseline": {"accuracy": 0.8102, "accuracy_std": 0.004044749683231304, "avg_cost": 0.00058120422, "cost_std": 5.343034901214926e-06, "small_model_fraction": 0.15957489249644047}, "unrestricted_random_baseline": {"accuracy": 0.807, "accuracy_std": 0.009767292357659798, "avg_cost": 0.0005922154199999999, "cost_std": 9.580509062654367e-06, "small_model_fraction": 0.1391500802762333}}, "0.1": {"accuracy": 0.78, "large_model_usage": 0.538, "small_model_usage": 0.46199999999999997, "avg_lambda": 0.7599999999999948, "avg_cost": 0.00040855439999999986, "unrestricted": {"accuracy": 0.786, "large_model_usage": 0.538, "avg_cost": 0.0004139063999999997}, "random_baseline": {"accuracy": 0.7454, "accuracy_std": 0.008765842800324459, "avg_cost": 0.00040707393999999997, "cost_std": 1.703422511323601e-05, "small_model_fraction": 0.4123311583934447}, "unrestricted_random_baseline": {"accuracy": 0.744, "accuracy_std": 0.009633275663033845, "avg_cost": 0.00041511299999999994, "cost_std": 9.576650538053467e-06, "small_model_fraction": 0.40392500682450083}}, "0.15": {"accuracy": 0.704, "large_model_usage": 0.348, "small_model_usage": 0.652, "avg_lambda": 0.6100000000000078, "avg_cost": 0.00027280240000000013, "unrestricted": {"accuracy": 0.72, "large_model_usage": 0.348, "avg_cost": 0.0002810104}, "random_baseline": {"accuracy": 0.6932, "accuracy_std": 0.008304215796810656, "avg_cost": 0.00027237805999999996, "cost_std": 1.559439532551361e-05, "small_model_fraction": 0.62555086836865}, "unrestricted_random_baseline": {"accuracy": 0.6936, "accuracy_std": 0.015067846561469863, "avg_cost": 0.00028295017999999995, "cost_std": 1.2922965437220609e-05, "small_model_fraction": 0.6126589229131395}}, "0.2": {"accuracy": 0.662, "large_model_usage": 0.198, "small_model_usage": 0.802, "avg_lambda": 0.3600000000000036, "avg_cost": 0.00015831440000000008, "unrestricted": {"accuracy": 0.676, "large_model_usage": 0.198, "avg_cost": 0.00017313840000000004}, "random_baseline": {"accuracy": 0.6446000000000001, "accuracy_std": 0.008901685233707162, "avg_cost": 0.00015852728000000001, "cost_std": 1.2000181153699282e-05, "small_model_fraction": 0.805372146549755}, "unrestricted_random_baseline": {"accuracy": 0.6588, "accuracy_std": 0.008634813257969168, "avg_cost": 0.00017588918000000002, "cost_std": 1.274537551285169e-05, "small_model_fraction": 0.7820887401861477}}, "0.25": {"accuracy": 0.616, "large_model_usage": 0.048, "small_model_usage": 0.952, "avg_lambda": 0.0399999999999995, "avg_cost": 6.128640000000004e-05, "unrestricted": {"accuracy": 0.626, "large_model_usage": 0.048, "avg_cost": 6.669840000000002e-05}, "random_baseline": {"accuracy": 0.6113999999999999, "accuracy_std": 0.004294182110716781, "avg_cost": 6.186388000000001e-05, "cost_std": 3.903400424450448e-06, "small_model_fraction": 0.9577697688276904}, "unrestricted_random_baseline": {"accuracy": 0.6122, "accuracy_std": 0.0048538644398046435, "avg_cost": 6.764524000000001e-05, "cost_std": 7.572910753891131e-06, "small_model_fraction": 0.9492693778913814}}}}