{"trial": 27, "trial_seed": 68, "subject": "TruthfulQA", "method": "standard_basescores", "calibration_size": 400, "test_size": 284, "small_model_accuracy": 0.5482456140350878, "large_model_accuracy": 0.8260233918128655, "small_model_cost": 3.200994152046785e-05, "large_model_cost": 0.0006154736842105275, "hybrid_results": {"0.05": {"accuracy": 0.8133802816901409, "large_model_usage": 0.6901408450704225, "small_model_usage": 0.3098591549295775, "avg_lambda": 0.8100000000000018, "avg_cost": 0.00045687957746478854, "unrestricted": {"accuracy": 0.8309859154929577, "large_model_usage": 0.6901408450704225, "avg_cost": 0.0004623654929577463}, "random_baseline": {"accuracy": 0.7806338028169014, "accuracy_std": 0.011360750566832181, "avg_cost": 0.0004687687323943661, "cost_std": 1.356332287202842e-05, "small_model_fraction": 0.2718148449371349}, "unrestricted_random_baseline": {"accuracy": 0.7855633802816901, "accuracy_std": 0.014211574697613097, "avg_cost": 0.0004700713732394366, "cost_std": 1.1176160104342362e-05, "small_model_fraction": 0.26241252035109475}}, "0.1": {"accuracy": 0.7852112676056338, "large_model_usage": 0.5774647887323944, "small_model_usage": 0.4225352112676056, "avg_lambda": 0.7299999999999969, "avg_cost": 0.00037883028169014075, "unrestricted": {"accuracy": 0.8063380281690141, "large_model_usage": 0.5774647887323944, "avg_cost": 0.0003928161971830985}, "random_baseline": {"accuracy": 0.7559859154929578, "accuracy_std": 0.011789203551255765, "avg_cost": 0.0003901331690140843, "cost_std": 1.452482059120546e-05, "small_model_fraction": 0.4055837324686917}, "unrestricted_random_baseline": {"accuracy": 0.7426056338028169, "accuracy_std": 0.017566858708825923, "avg_cost": 0.00038765387323943645, "cost_std": 1.3604445599378773e-05, "small_model_fraction": 0.3816132361556292}}, "0.15": {"accuracy": 0.7394366197183099, "large_model_usage": 0.4471830985915493, "small_model_usage": 0.5528169014084507, "avg_lambda": 0.5800000000000017, "avg_cost": 0.00030326690140845055, "unrestricted": {"accuracy": 0.7676056338028169, "large_model_usage": 0.4471830985915493, "avg_cost": 0.0003121894366197182}, "random_baseline": {"accuracy": 0.7007042253521127, "accuracy_std": 0.023356512608136634, "avg_cost": 0.00029786450704225346, "cost_std": 1.7942949714316848e-05, "small_model_fraction": 0.5350920030825695}, "unrestricted_random_baseline": {"accuracy": 0.7098591549295775, "accuracy_std": 0.011267605633802849, "avg_cost": 0.00031486021126760555, "cost_std": 1.5805466889639297e-05, "small_model_fraction": 0.5197996471769047}}, "0.2": {"accuracy": 0.704225352112676, "large_model_usage": 0.3485915492957746, "small_model_usage": 0.6514084507042254, "avg_lambda": 0.40000000000000185, "avg_cost": 0.0002371683098591549, "unrestricted": {"accuracy": 0.7429577464788732, "large_model_usage": 0.3485915492957746, "avg_cost": 0.0002509147887323943}, "random_baseline": {"accuracy": 0.6707746478873239, "accuracy_std": 0.010235522432217333, "avg_cost": 0.00023856091549295764, "cost_std": 1.0869747670792602e-05, "small_model_fraction": 0.6483785480948578}, "unrestricted_random_baseline": {"accuracy": 0.6894366197183099, "accuracy_std": 0.01596589302641498, "avg_cost": 0.00025638612676056336, "cost_std": 1.11219119931626e-05, "small_model_fraction": 0.6248184228163594}}, "0.25": {"accuracy": 0.6197183098591549, "large_model_usage": 0.2605633802816901, "small_model_usage": 0.7394366197183099, "avg_lambda": 0.2700000000000005, "avg_cost": 0.00017718239436619718, "unrestricted": {"accuracy": 0.7077464788732394, "large_model_usage": 0.2605633802816901, "avg_cost": 0.0001963302816901408}, "random_baseline": {"accuracy": 0.6369718309859155, "accuracy_std": 0.012444082427515721, "avg_cost": 0.0001681030633802817, "cost_std": 1.3867718935454864e-05, "small_model_fraction": 0.7511885619894534}, "unrestricted_random_baseline": {"accuracy": 0.657394366197183, "accuracy_std": 0.01753860479635531, "avg_cost": 0.00019168126760563386, "cost_std": 1.735813759396455e-05, "small_model_fraction": 0.7183709489606432}}}}