{"trial": 22, "trial_seed": 63, "subject": "TruthfulQA", "method": "standard_basescores", "calibration_size": 400, "test_size": 284, "small_model_accuracy": 0.554093567251462, "large_model_accuracy": 0.8157894736842105, "small_model_cost": 3.205672514619885e-05, "large_model_cost": 0.000614912280701756, "hybrid_results": {"0.05": {"accuracy": 0.7711267605633803, "large_model_usage": 0.7570422535211268, "small_model_usage": 0.24295774647887325, "avg_lambda": 0.8300000000000048, "avg_cost": 0.0004972105633802814, "unrestricted": {"accuracy": 0.7816901408450704, "large_model_usage": 0.7570422535211268, "avg_cost": 0.0005026964788732394}, "random_baseline": {"accuracy": 0.7619718309859155, "accuracy_std": 0.016074242550018752, "avg_cost": 0.0005001486267605632, "cost_std": 1.3752069510676227e-05, "small_model_fraction": 0.2019397708395959}, "unrestricted_random_baseline": {"accuracy": 0.7665492957746479, "accuracy_std": 0.009188724190633712, "avg_cost": 0.0005054066549295774, "cost_std": 1.0632746047958653e-05, "small_model_fraction": 0.19252763529302983}}, "0.1": {"accuracy": 0.7394366197183099, "large_model_usage": 0.6091549295774648, "small_model_usage": 0.39084507042253525, "avg_lambda": 0.7100000000000012, "avg_cost": 0.0003944147887323942, "unrestricted": {"accuracy": 0.7676056338028169, "large_model_usage": 0.6091549295774648, "avg_cost": 0.00040997816901408433}, "random_baseline": {"accuracy": 0.7204225352112675, "accuracy_std": 0.013193657743095224, "avg_cost": 0.00040589369718309836, "cost_std": 1.7145322354548774e-05, "small_model_fraction": 0.37830555077954336}, "unrestricted_random_baseline": {"accuracy": 0.7257042253521128, "accuracy_std": 0.010138154964005597, "avg_cost": 0.0004170373239436618, "cost_std": 1.220733509932192e-05, "small_model_fraction": 0.35160360012754066}}, "0.15": {"accuracy": 0.676056338028169, "large_model_usage": 0.4225352112676056, "small_model_usage": 0.5774647887323944, "avg_lambda": 0.5100000000000009, "avg_cost": 0.00028423169014084505, "unrestricted": {"accuracy": 0.7007042253521126, "large_model_usage": 0.4225352112676056, "avg_cost": 0.0002947176056338027}, "random_baseline": {"accuracy": 0.6566901408450705, "accuracy_std": 0.00894255288676076, "avg_cost": 0.00027458204225352104, "cost_std": 2.2775873678436778e-05, "small_model_fraction": 0.5673456955312333}, "unrestricted_random_baseline": {"accuracy": 0.6644366197183098, "accuracy_std": 0.02047407290155861, "avg_cost": 0.00029443267605633794, "cost_std": 1.8376240207017378e-05, "small_model_fraction": 0.549355105250314}}, "0.2": {"accuracy": 0.6514084507042254, "large_model_usage": 0.34507042253521125, "small_model_usage": 0.6549295774647887, "avg_lambda": 0.40000000000000185, "avg_cost": 0.00023098521126760574, "unrestricted": {"accuracy": 0.6866197183098591, "large_model_usage": 0.34507042253521125, "avg_cost": 0.0002455274647887325}, "random_baseline": {"accuracy": 0.6355633802816902, "accuracy_std": 0.019043404623929533, "avg_cost": 0.0002293516549295774, "cost_std": 1.4532815895444086e-05, "small_model_fraction": 0.6587001972867955}, "unrestricted_random_baseline": {"accuracy": 0.6355633802816902, "accuracy_std": 0.013477224795073699, "avg_cost": 0.0002428576408450704, "cost_std": 1.4837874219648636e-05, "small_model_fraction": 0.6337501845734987}}, "0.25": {"accuracy": 0.5633802816901409, "large_model_usage": 0.25, "small_model_usage": 0.75, "avg_lambda": 0.20000000000000093, "avg_cost": 0.00016710492957746492, "unrestricted": {"accuracy": 0.6443661971830986, "large_model_usage": 0.25, "avg_cost": 0.00018685140845070425}, "random_baseline": {"accuracy": 0.6014084507042254, "accuracy_std": 0.015412724386084004, "avg_cost": 0.00016365542253521124, "cost_std": 1.0608594491397986e-05, "small_model_fraction": 0.7682990182661245}, "unrestricted_random_baseline": {"accuracy": 0.6165492957746479, "accuracy_std": 0.015140845070422558, "avg_cost": 0.00018722031690140842, "cost_std": 1.3619363619128842e-05, "small_model_fraction": 0.7344201632307328}}}}