{"trial": 9, "trial_seed": 50, "subject": "TruthfulQA", "method": "standard_basescores", "calibration_size": 400, "test_size": 284, "small_model_accuracy": 0.5526315789473685, "large_model_accuracy": 0.8230994152046783, "small_model_cost": 3.204590643274858e-05, "large_model_cost": 0.0006138538011695924, "hybrid_results": {"0.05": {"accuracy": 0.8063380281690141, "large_model_usage": 1.0, "small_model_usage": 0.0, "avg_lambda": 1.0, "avg_cost": 0.0006454306338028169, "unrestricted": {"accuracy": 0.8063380281690141, "large_model_usage": 1.0, "avg_cost": 0.0006454306338028169}, "random_baseline": {"accuracy": 0.8063380281690142, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006133732394366196, "cost_std": 0.0, "small_model_fraction": 0.0}, "unrestricted_random_baseline": {"accuracy": 0.8063380281690142, "accuracy_std": 1.1102230246251565e-16, "avg_cost": 0.0006133732394366196, "cost_std": 0.0, "small_model_fraction": 0.0}}, "0.1": {"accuracy": 0.7394366197183099, "large_model_usage": 0.5845070422535211, "small_model_usage": 0.4154929577464789, "avg_lambda": 0.7599999999999976, "avg_cost": 0.0003900433098591546, "unrestricted": {"accuracy": 0.75, "large_model_usage": 0.5845070422535211, "avg_cost": 0.0003929869718309856}, "random_baseline": {"accuracy": 0.7126760563380281, "accuracy_std": 0.015038138383142718, "avg_cost": 0.00039800559859154904, "cost_std": 1.4436405359495851e-05, "small_model_fraction": 0.3846810834556809}, "unrestricted_random_baseline": {"accuracy": 0.7098591549295774, "accuracy_std": 0.014449496146960023, "avg_cost": 0.000395096056338028, "cost_std": 1.3162436312726024e-05, "small_model_fraction": 0.3796215749848265}}, "0.15": {"accuracy": 0.7183098591549296, "large_model_usage": 0.46830985915492956, "small_model_usage": 0.5316901408450705, "avg_lambda": 0.6100000000000032, "avg_cost": 0.0003133672535211269, "unrestricted": {"accuracy": 0.7323943661971831, "large_model_usage": 0.46830985915492956, "avg_cost": 0.0003209658450704225}, "random_baseline": {"accuracy": 0.6679577464788732, "accuracy_std": 0.01746777007720063, "avg_cost": 0.0003160462323943661, "cost_std": 1.7682289626809905e-05, "small_model_fraction": 0.5164703854429097}, "unrestricted_random_baseline": {"accuracy": 0.6816901408450704, "accuracy_std": 0.011157027829404816, "avg_cost": 0.0003216263380281689, "cost_std": 1.6372956354360323e-05, "small_model_fraction": 0.5034100752992453}}, "0.2": {"accuracy": 0.676056338028169, "large_model_usage": 0.3767605633802817, "small_model_usage": 0.6232394366197183, "avg_lambda": 0.4800000000000006, "avg_cost": 0.0002508109154929578, "unrestricted": {"accuracy": 0.7112676056338029, "large_model_usage": 0.3767605633802817, "avg_cost": 0.0002651348591549296}, "random_baseline": {"accuracy": 0.6422535211267606, "accuracy_std": 0.01877787552953998, "avg_cost": 0.0002480050352112677, "cost_std": 1.794913827996497e-05, "small_model_fraction": 0.6239909924921897}, "unrestricted_random_baseline": {"accuracy": 0.6612676056338028, "accuracy_std": 0.012177194694218702, "avg_cost": 0.00027564665492957747, "cost_std": 1.0385423174442583e-05, "small_model_fraction": 0.5993712790239656}}, "0.25": {"accuracy": 0.6514084507042254, "large_model_usage": 0.2640845070422535, "small_model_usage": 0.7359154929577465, "avg_lambda": 0.3100000000000012, "avg_cost": 0.0001831841549295775, "unrestricted": {"accuracy": 0.6901408450704225, "large_model_usage": 0.2640845070422535, "avg_cost": 0.00019527570422535218}, "random_baseline": {"accuracy": 0.6154929577464789, "accuracy_std": 0.011442307612163345, "avg_cost": 0.00018206066901408456, "cost_std": 1.0997677669871092e-05, "small_model_fraction": 0.7402265423620801}, "unrestricted_random_baseline": {"accuracy": 0.6137323943661973, "accuracy_std": 0.014859822426767063, "avg_cost": 0.00019344823943661976, "cost_std": 1.9556322152027125e-05, "small_model_fraction": 0.7194438245523745}}}}