{"trial": 29, "trial_seed": 70, "subject": "TruthfulQA", "method": "standard", "calibration_size": 400, "test_size": 284, "small_model_accuracy": 0.5906432748538012, "large_model_accuracy": 0.8406432748538012, "small_model_cost": 3.226681286549711e-05, "large_model_cost": 0.00062158187134503, "hybrid_results": {"0.05": {"accuracy": 0.8098591549295775, "large_model_usage": 0.8345070422535211, "small_model_usage": 0.16549295774647887, "avg_lambda": 0.900000000000004, "avg_cost": 0.0005571602112676059, "unrestricted": {"accuracy": 0.8098591549295775, "large_model_usage": 0.8345070422535211, "avg_cost": 0.0005581038732394369}, "random_baseline": {"accuracy": 0.8109154929577466, "accuracy_std": 0.0066901408450704145, "avg_cost": 0.0005610594718309859, "cost_std": 1.053557050402685e-05, "small_model_fraction": 0.10931616144959169}, "unrestricted_random_baseline": {"accuracy": 0.8123239436619718, "accuracy_std": 0.01527130171986017, "avg_cost": 0.0005620054225352114, "cost_std": 1.1894192940640717e-05, "small_model_fraction": 0.10771487541718354}}, "0.1": {"accuracy": 0.7746478873239436, "large_model_usage": 0.6267605633802817, "small_model_usage": 0.37323943661971826, "avg_lambda": 0.7599999999999976, "avg_cost": 0.00042226584507042267, "unrestricted": {"accuracy": 0.778169014084507, "large_model_usage": 0.6267605633802817, "avg_cost": 0.00042764612676056353}, "random_baseline": {"accuracy": 0.7503521126760563, "accuracy_std": 0.016624044555088535, "avg_cost": 0.0004159550352112676, "cost_std": 9.555850254101042e-06, "small_model_fraction": 0.33821641481358766}, "unrestricted_random_baseline": {"accuracy": 0.751056338028169, "accuracy_std": 0.013732394366197165, "avg_cost": 0.00042504278169014077, "cost_std": 1.7962189598261877e-05, "small_model_fraction": 0.3290866944497091}}, "0.15": {"accuracy": 0.7147887323943662, "large_model_usage": 0.397887323943662, "small_model_usage": 0.602112676056338, "avg_lambda": 0.5100000000000009, "avg_cost": 0.00027439964788732375, "unrestricted": {"accuracy": 0.7288732394366197, "large_model_usage": 0.397887323943662, "avg_cost": 0.0002842095070422534}, "random_baseline": {"accuracy": 0.6869718309859154, "accuracy_std": 0.016624044555088553, "avg_cost": 0.00027430045774647874, "cost_std": 1.8194449632983734e-05, "small_model_fraction": 0.5891283761753121}, "unrestricted_random_baseline": {"accuracy": 0.6890845070422535, "accuracy_std": 0.016594185447814318, "avg_cost": 0.00028249257042253507, "cost_std": 1.4207259389568296e-05, "small_model_fraction": 0.5724821713757272}}, "0.2": {"accuracy": 0.6795774647887324, "large_model_usage": 0.27816901408450706, "small_model_usage": 0.721830985915493, "avg_lambda": 0.35999999999999954, "avg_cost": 0.00019706866197183084, "unrestricted": {"accuracy": 0.7147887323943662, "large_model_usage": 0.27816901408450706, "avg_cost": 0.00020877288732394351}, "random_baseline": {"accuracy": 0.6640845070422535, "accuracy_std": 0.011157027829404816, "avg_cost": 0.00020362623239436606, "cost_std": 1.6802239511739166e-05, "small_model_fraction": 0.7203501815624191}, "unrestricted_random_baseline": {"accuracy": 0.6630281690140845, "accuracy_std": 0.012503718946876534, "avg_cost": 0.0002078869366197182, "cost_std": 2.024609798505775e-05, "small_model_fraction": 0.7004894548022541}}, "0.25": {"accuracy": 0.6373239436619719, "large_model_usage": 0.23943661971830985, "small_model_usage": 0.7605633802816901, "avg_lambda": 0.2700000000000005, "avg_cost": 0.00016804049295774634, "unrestricted": {"accuracy": 0.7007042253521126, "large_model_usage": 0.23943661971830985, "avg_cost": 0.0001845264084507041}, "random_baseline": {"accuracy": 0.6338028169014084, "accuracy_std": 0.01659044928083064, "avg_cost": 0.0001628509507042253, "cost_std": 1.6559783729459892e-05, "small_model_fraction": 0.7696076519026118}, "unrestricted_random_baseline": {"accuracy": 0.6461267605633803, "accuracy_std": 0.014959609708625352, "avg_cost": 0.0001805014436619717, "cost_std": 1.1139011820831484e-05, "small_model_fraction": 0.7416329459185286}}}}