{"trial": 11, "trial_seed": 52, "subject": "TruthfulQA", "method": "standard_basescores", "calibration_size": 400, "test_size": 284, "small_model_accuracy": 0.5511695906432749, "large_model_accuracy": 0.8362573099415205, "small_model_cost": 3.205467836257314e-05, "large_model_cost": 0.0006146374269005851, "hybrid_results": {"0.05": {"accuracy": 0.8415492957746479, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.000646532746478873, "unrestricted": {"accuracy": 0.8415492957746479, "large_model_usage": 1.0, "avg_cost": 0.000646532746478873}, "random_baseline": {"accuracy": 0.8415492957746478, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006144014084507037, "cost_std": 0.0, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.8415492957746478, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006144014084507037, "cost_std": 0.0, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.7816901408450704, "large_model_usage": 0.6232394366197183, "small_model_usage": 0.37676056338028174, "avg_lambda": 0.7700000000000036, "avg_cost": 0.00041471584507042235, "unrestricted": {"accuracy": 0.7922535211267606, "large_model_usage": 0.6232394366197183, "avg_cost": 0.00041779330985915484}, "random_baseline": {"accuracy": 0.745774647887324, "accuracy_std": 0.015809819944115226, "avg_cost": 0.00042232714788732376, "cost_std": 1.9562684449702827e-05, "small_model_fraction": 0.34316426693352114}, "unrestricted_random_baseline": {"accuracy": 0.7489436619718309, "accuracy_std": 0.011997693688310421, "avg_cost": 0.0004175191549295772, "cost_std": 1.3606479219862924e-05, "small_model_fraction": 0.33788181599165}}, "0.15": {"accuracy": 0.6971830985915493, "large_model_usage": 0.44366197183098594, "small_model_usage": 0.556338028169014, "avg_lambda": 0.5800000000000017, "avg_cost": 0.0002966383802816901, "unrestricted": {"accuracy": 0.7253521126760564, "large_model_usage": 0.44366197183098594, "avg_cost": 0.00030789894366197183}, "random_baseline": {"accuracy": 0.6795774647887324, "accuracy_std": 0.018764665626020013, "avg_cost": 0.00029898728873239424, "cost_std": 2.265719067895788e-05, "small_model_fraction": 0.5458435688610962}, "unrestricted_random_baseline": {"accuracy": 0.6982394366197182, "accuracy_std": 0.015108054896426025, "avg_cost": 0.000306912042253521, "cost_std": 1.4240908225596592e-05, "small_model_fraction": 0.526514875368987}}, "0.2": {"accuracy": 0.6302816901408451, "large_model_usage": 0.352112676056338, "small_model_usage": 0.647887323943662, "avg_lambda": 0.4899999999999987, "avg_cost": 0.00023849049295774635, "unrestricted": {"accuracy": 0.704225352112676, "large_model_usage": 0.352112676056338, "avg_cost": 0.0002516735915492957}, "random_baseline": {"accuracy": 0.6612676056338028, "accuracy_std": 0.016945365373966122, "avg_cost": 0.0002422526056338027, "cost_std": 2.5484938846197872e-05, "small_model_fraction": 0.6456540892890792}, "unrestricted_random_baseline": {"accuracy": 0.6647887323943661, "accuracy_std": 0.01237915199383586, "avg_cost": 0.0002513676056338027, "cost_std": 2.0926674294056408e-05, "small_model_fraction": 0.6230253749568538}}, "0.25": {"accuracy": 0.5985915492957746, "large_model_usage": 0.25704225352112675, "small_model_usage": 0.7429577464788732, "avg_lambda": 0.3100000000000012, "avg_cost": 0.00017535669014084496, "unrestricted": {"accuracy": 0.6690140845070423, "large_model_usage": 0.25704225352112675, "avg_cost": 0.0001917228873239435}, "random_baseline": {"accuracy": 0.6225352112676056, "accuracy_std": 0.012379151993835897, "avg_cost": 0.0001694100352112675, "cost_std": 1.2969944026755922e-05, "small_model_fraction": 0.7540229055221985}, "unrestricted_random_baseline": {"accuracy": 0.6383802816901408, "accuracy_std": 0.018367325853161948, "avg_cost": 0.0001904326056338027, "cost_std": 1.2194466847797954e-05, "small_model_fraction": 0.7259304204217224}}}}