{"trial": 19, "trial_seed": 60, "subject": "TruthfulQA", "method": "standard_basescores", "calibration_size": 400, "test_size": 284, "small_model_accuracy": 0.5526315789473685, "large_model_accuracy": 0.8377192982456141, "small_model_cost": 3.2065058479532224e-05, "large_model_cost": 0.0006145643274853809, "hybrid_results": {"0.05": {"accuracy": 0.7746478873239436, "large_model_usage": 0.778169014084507, "small_model_usage": 0.221830985915493, "avg_lambda": 0.8600000000000062, "avg_cost": 0.0005025095070422535, "unrestricted": {"accuracy": 0.7852112676056338, "large_model_usage": 0.778169014084507, "avg_cost": 0.0005131714788732398}, "random_baseline": {"accuracy": 0.7577464788732394, "accuracy_std": 0.01914405242122277, "avg_cost": 0.0005004165140845068, "cost_std": 1.6187441796238684e-05, "small_model_fraction": 0.19236903186912374}, "unrestricted_random_baseline": {"accuracy": 0.7619718309859154, "accuracy_std": 0.010818515137843095, "avg_cost": 0.0005098509859154928, "cost_std": 1.655567446130417e-05, "small_model_fraction": 0.17406519459704778}}, "0.1": {"accuracy": 0.7464788732394366, "large_model_usage": 0.5985915492957746, "small_model_usage": 0.4014084507042254, "avg_lambda": 0.7100000000000012, "avg_cost": 0.00038769260563380263, "unrestricted": {"accuracy": 0.7640845070422535, "large_model_usage": 0.5985915492957746, "avg_cost": 0.00040320669014084495}, "random_baseline": {"accuracy": 0.7028169014084507, "accuracy_std": 0.012115951080341747, "avg_cost": 0.0003886156338028167, "cost_std": 1.6714838143958635e-05, "small_model_fraction": 0.3894798395863744}, "unrestricted_random_baseline": {"accuracy": 0.7095070422535212, "accuracy_std": 0.01824541747610089, "avg_cost": 0.0003962413028169012, "cost_std": 1.0794645509625841e-05, "small_model_fraction": 0.36284618469180224}}, "0.15": {"accuracy": 0.6901408450704225, "large_model_usage": 0.4119718309859155, "small_model_usage": 0.5880281690140845, "avg_lambda": 0.5300000000000009, "avg_cost": 0.00027752359154929583, "unrestricted": {"accuracy": 0.7253521126760564, "large_model_usage": 0.4119718309859155, "avg_cost": 0.0002878334507042254}, "random_baseline": {"accuracy": 0.6524647887323944, "accuracy_std": 0.018163690671167503, "avg_cost": 0.00027632214788732393, "cost_std": 1.520683457137826e-05, "small_model_fraction": 0.5786114315839613}, "unrestricted_random_baseline": {"accuracy": 0.6517605633802817, "accuracy_std": 0.010732218770512782, "avg_cost": 0.000289556514084507, "cost_std": 2.173635094653623e-05, "small_model_fraction": 0.5609120803512544}}, "0.2": {"accuracy": 0.6584507042253521, "large_model_usage": 0.3345070422535211, "small_model_usage": 0.6654929577464789, "avg_lambda": 0.40000000000000185, "avg_cost": 0.00022824190140845063, "unrestricted": {"accuracy": 0.6971830985915493, "large_model_usage": 0.3345070422535211, "avg_cost": 0.00023981936619718307}, "random_baseline": {"accuracy": 0.6158450704225352, "accuracy_std": 0.012641776810574839, "avg_cost": 0.0002198406690140845, "cost_std": 1.6696189350354824e-05, "small_model_fraction": 0.6632152976539639}, "unrestricted_random_baseline": {"accuracy": 0.6313380281690141, "accuracy_std": 0.022493910216456973, "avg_cost": 0.0002503084507042253, "cost_std": 2.1333762380394e-05, "small_model_fraction": 0.6433397966795305}}, "0.25": {"accuracy": 0.5563380281690141, "large_model_usage": 0.2640845070422535, "small_model_usage": 0.7359154929577465, "avg_lambda": 0.20000000000000093, "avg_cost": 0.00017464330985915495, "unrestricted": {"accuracy": 0.6619718309859155, "large_model_usage": 0.2640845070422535, "avg_cost": 0.00019633345070422536}, "random_baseline": {"accuracy": 0.5919014084507043, "accuracy_std": 0.011512523045261826, "avg_cost": 0.00016497767605633804, "cost_std": 1.8383046865282722e-05, "small_model_fraction": 0.7552301625666227}, "unrestricted_random_baseline": {"accuracy": 0.6049295774647886, "accuracy_std": 0.01133343446438807, "avg_cost": 0.00019442306338028167, "cost_std": 1.2574170176796639e-05, "small_model_fraction": 0.7179938225415288}}}}