{"trial": 8, "trial_seed": 49, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.605, "large_model_accuracy": 0.835, "small_model_cost": 3.4565800000000014e-05, "large_model_cost": 0.0006763799999999996, "hybrid_results": {"0.05": {"accuracy": 0.782, "large_model_usage": 0.792, "small_model_usage": 0.20799999999999996, "avg_lambda": 0.8999999999999925, "avg_cost": 0.0005720950000000002, "unrestricted": {"accuracy": 0.782, "large_model_usage": 0.792, "avg_cost": 0.0005728710000000004}, "random_baseline": {"accuracy": 0.7904000000000001, "accuracy_std": 0.008187795796183495, "avg_cost": 0.0005662060799999996, "cost_std": 9.074434027177601e-06, "small_model_fraction": 0.16248471909783158}, "unrestricted_random_baseline": {"accuracy": 0.7902000000000001, "accuracy_std": 0.01129424632279641, "avg_cost": 0.0005697064799999996, "cost_std": 8.71383390899784e-06, "small_model_fraction": 0.16127564644097825}}, "0.1": {"accuracy": 0.75, "large_model_usage": 0.556, "small_model_usage": 0.44399999999999995, "avg_lambda": 0.7599999999999948, "avg_cost": 0.0004098110000000001, "unrestricted": {"accuracy": 0.756, "large_model_usage": 0.556, "avg_cost": 0.0004165790000000001}, "random_baseline": {"accuracy": 0.7328, "accuracy_std": 0.010476640682967045, "avg_cost": 0.0004034586199999998, "cost_std": 7.089592451163917e-06, "small_model_fraction": 0.4153367127121832}, "unrestricted_random_baseline": {"accuracy": 0.7338, "accuracy_std": 0.009357350052231676, "avg_cost": 0.00040843385999999964, "cost_std": 1.0433892528505325e-05, "small_model_fraction": 0.4047916047977744}}, "0.15": {"accuracy": 0.678, "large_model_usage": 0.31, "small_model_usage": 0.69, "avg_lambda": 0.5399999999999986, "avg_cost": 0.00023978299999999984, "unrestricted": {"accuracy": 0.682, "large_model_usage": 0.31, "avg_cost": 0.00024748299999999984}, "random_baseline": {"accuracy": 0.6668000000000001, "accuracy_std": 0.012718490476467729, "avg_cost": 0.0002337922200000001, "cost_std": 1.464879429071214e-05, "small_model_fraction": 0.6802545035619344}, "unrestricted_random_baseline": {"accuracy": 0.6764, "accuracy_std": 0.009991996797437409, "avg_cost": 0.00024152122, "cost_std": 1.1746169011707669e-05, "small_model_fraction": 0.6682572619926453}}, "0.2": {"accuracy": 0.668, "large_model_usage": 0.224, "small_model_usage": 0.776, "avg_lambda": 0.37000000000000255, "avg_cost": 0.00017666699999999995, "unrestricted": {"accuracy": 0.67, "large_model_usage": 0.224, "avg_cost": 0.00019011899999999996}, "random_baseline": {"accuracy": 0.6476, "accuracy_std": 0.009707728879609285, "avg_cost": 0.00017394283999999993, "cost_std": 1.3649710975636103e-05, "small_model_fraction": 0.7785944904304081}, "unrestricted_random_baseline": {"accuracy": 0.6578, "accuracy_std": 0.009693296652842116, "avg_cost": 0.0001887725, "cost_std": 1.4495466738880867e-05, "small_model_fraction": 0.7576351536005279}}, "0.25": {"accuracy": 0.63, "large_model_usage": 0.142, "small_model_usage": 0.858, "avg_lambda": 0.05000000000000044, "avg_cost": 0.00011571900000000004, "unrestricted": {"accuracy": 0.648, "large_model_usage": 0.142, "avg_cost": 0.00013309100000000006}, "random_baseline": {"accuracy": 0.6255999999999999, "accuracy_std": 0.007364781055808794, "avg_cost": 0.00011321641999999998, "cost_std": 1.1101949275852412e-05, "small_model_fraction": 0.8735565526596326}, "unrestricted_random_baseline": {"accuracy": 0.6277999999999999, "accuracy_std": 0.009734474818910375, "avg_cost": 0.00012600279999999995, "cost_std": 4.790000745720177e-06, "small_model_fraction": 0.8464895292126597}}}}