{"trial": 24, "trial_seed": 65, "subject": "TruthfulQA", "method": "standard_basescores", "calibration_size": 400, "test_size": 284, "small_model_accuracy": 0.5599415204678363, "large_model_accuracy": 0.8201754385964912, "small_model_cost": 3.206885964912288e-05, "large_model_cost": 0.0006151549707602342, "hybrid_results": {"0.05": {"accuracy": 0.8204225352112676, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0006491605633802814, "unrestricted": {"accuracy": 0.8204225352112676, "large_model_usage": 1.0, "avg_cost": 0.0006491605633802814}, "random_baseline": {"accuracy": 0.8204225352112676, "accuracy_std": 0.0, "avg_cost": 0.000617070422535211, "cost_std": 0.0, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.8204225352112676, "accuracy_std": 0.0, "avg_cost": 0.000617070422535211, "cost_std": 0.0, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.7887323943661971, "large_model_usage": 0.7394366197183099, "small_model_usage": 0.2605633802816901, "avg_lambda": 0.8000000000000037, "avg_cost": 0.00048472394366197164, "unrestricted": {"accuracy": 0.8063380281690141, "large_model_usage": 0.7394366197183099, "avg_cost": 0.0004915126760563378}, "random_baseline": {"accuracy": 0.768661971830986, "accuracy_std": 0.014350481600221723, "avg_cost": 0.00048665499999999963, "cost_std": 1.6789661299067184e-05, "small_model_fraction": 0.22369084876626041}, "unrestricted_random_baseline": {"accuracy": 0.7647887323943661, "accuracy_std": 0.009154929577464777, "avg_cost": 0.0004932394718309854, "cost_std": 1.0401463733022203e-05, "small_model_fraction": 0.21204808749137813}}, "0.15": {"accuracy": 0.75, "large_model_usage": 0.4647887323943662, "small_model_usage": 0.5352112676056338, "avg_lambda": 0.6100000000000032, "avg_cost": 0.00031480140845070406, "unrestricted": {"accuracy": 0.7640845070422535, "large_model_usage": 0.4647887323943662, "avg_cost": 0.00032317464788732383}, "random_baseline": {"accuracy": 0.6767605633802817, "accuracy_std": 0.019843665920570918, "avg_cost": 0.0003054798239436618, "cost_std": 1.2825777281926714e-05, "small_model_fraction": 0.5151101296808209}, "unrestricted_random_baseline": {"accuracy": 0.6820422535211268, "accuracy_std": 0.008487303375488685, "avg_cost": 0.000318156056338028, "cost_std": 1.320791437744598e-05, "small_model_fraction": 0.5007499189382533}}, "0.2": {"accuracy": 0.7359154929577465, "large_model_usage": 0.4014084507042254, "small_model_usage": 0.5985915492957746, "avg_lambda": 0.5, "avg_cost": 0.00027512535211267607, "unrestricted": {"accuracy": 0.7535211267605634, "large_model_usage": 0.4014084507042254, "avg_cost": 0.00028441408450704224}, "random_baseline": {"accuracy": 0.6767605633802817, "accuracy_std": 0.022699664029980554, "avg_cost": 0.0002851984859154929, "cost_std": 1.8297747463577918e-05, "small_model_fraction": 0.5831550643516579}, "unrestricted_random_baseline": {"accuracy": 0.673943661971831, "accuracy_std": 0.014189747661697083, "avg_cost": 0.00029004753521126743, "cost_std": 1.59649878876038e-05, "small_model_fraction": 0.5672247716944279}}, "0.25": {"accuracy": 0.6091549295774648, "large_model_usage": 0.23943661971830985, "small_model_usage": 0.7605633802816901, "avg_lambda": 0.20000000000000093, "avg_cost": 0.00016601267605633808, "unrestricted": {"accuracy": 0.6901408450704225, "large_model_usage": 0.23943661971830985, "avg_cost": 0.00018312535211267614}, "random_baseline": {"accuracy": 0.620774647887324, "accuracy_std": 0.008189931936347221, "avg_cost": 0.00016420038732394368, "cost_std": 1.4099845579623284e-05, "small_model_fraction": 0.7702846734730554}, "unrestricted_random_baseline": {"accuracy": 0.6253521126760564, "accuracy_std": 0.018173926620512613, "avg_cost": 0.00018142929577464787, "cost_std": 1.2854154620539157e-05, "small_model_fraction": 0.7409362192220551}}}}