{"trial": 2, "trial_seed": 43, "subject": "TruthfulQA", "method": "standard_basescores", "calibration_size": 400, "test_size": 284, "small_model_accuracy": 0.5555555555555556, "large_model_accuracy": 0.8260233918128655, "small_model_cost": 3.205482456140351e-05, "large_model_cost": 0.0006160906432748544, "hybrid_results": {"0.05": {"accuracy": 0.823943661971831, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.000646777464788732, "unrestricted": {"accuracy": 0.823943661971831, "large_model_usage": 1.0, "avg_cost": 0.000646777464788732}, "random_baseline": {"accuracy": 0.823943661971831, "accuracy_std": 0.0, "avg_cost": 0.0006147605633802814, "cost_std": 0.0, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.823943661971831, "accuracy_std": 0.0, "avg_cost": 0.0006147605633802814, "cost_std": 0.0, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.7359154929577465, "large_model_usage": 0.6161971830985915, "small_model_usage": 0.3838028169014085, "avg_lambda": 0.7599999999999976, "avg_cost": 0.00041003802816901366, "unrestricted": {"accuracy": 0.7464788732394366, "large_model_usage": 0.6161971830985915, "avg_cost": 0.000413502816901408}, "random_baseline": {"accuracy": 0.7359154929577465, "accuracy_std": 0.018431341307468162, "avg_cost": 0.0004089239436619715, "cost_std": 1.498189773344807e-05, "small_model_fraction": 0.35280818145665416}, "unrestricted_random_baseline": {"accuracy": 0.7352112676056338, "accuracy_std": 0.01841788286003099, "avg_cost": 0.00041190845070422497, "cost_std": 2.060170805002476e-05, "small_model_fraction": 0.34687568789825085}}, "0.15": {"accuracy": 0.6936619718309859, "large_model_usage": 0.4788732394366197, "small_model_usage": 0.5211267605633803, "avg_lambda": 0.6299999999999976, "avg_cost": 0.0003190309859154926, "unrestricted": {"accuracy": 0.7147887323943662, "large_model_usage": 0.4788732394366197, "avg_cost": 0.0003283056338028166}, "random_baseline": {"accuracy": 0.6887323943661972, "accuracy_std": 0.01261723441349924, "avg_cost": 0.0003208741549295772, "cost_std": 1.5402737472362816e-05, "small_model_fraction": 0.5086326006746343}, "unrestricted_random_baseline": {"accuracy": 0.702112676056338, "accuracy_std": 0.016901408450704238, "avg_cost": 0.00033673035211267575, "cost_std": 2.022886131160849e-05, "small_model_fraction": 0.49275232828354243}}, "0.2": {"accuracy": 0.647887323943662, "large_model_usage": 0.30985915492957744, "small_model_usage": 0.6901408450704225, "avg_lambda": 0.40000000000000185, "avg_cost": 0.00021392535211267594, "unrestricted": {"accuracy": 0.6654929577464789, "large_model_usage": 0.30985915492957744, "avg_cost": 0.00022449577464788724}, "random_baseline": {"accuracy": 0.6485915492957748, "accuracy_std": 0.013978474113576915, "avg_cost": 0.0002124919718309859, "cost_std": 1.1507763324719107e-05, "small_model_fraction": 0.6885969631932718}, "unrestricted_random_baseline": {"accuracy": 0.6457746478873239, "accuracy_std": 0.01104534305729444, "avg_cost": 0.00022764580985915484, "cost_std": 2.0666605204572412e-05, "small_model_fraction": 0.6704980346746469}}, "0.25": {"accuracy": 0.602112676056338, "large_model_usage": 0.2605633802816901, "small_model_usage": 0.7394366197183099, "avg_lambda": 0.25, "avg_cost": 0.00017558732394366207, "unrestricted": {"accuracy": 0.6514084507042254, "large_model_usage": 0.2605633802816901, "avg_cost": 0.00019381267605633802}, "random_baseline": {"accuracy": 0.6306338028169014, "accuracy_std": 0.013768461123955768, "avg_cost": 0.00018161830985915492, "cost_std": 1.119192526368517e-05, "small_model_fraction": 0.754240245575279}, "unrestricted_random_baseline": {"accuracy": 0.6309859154929578, "accuracy_std": 0.013156015276246087, "avg_cost": 0.00019130348591549296, "cost_std": 1.1649266834672859e-05, "small_model_fraction": 0.7230343648249787}}}}