{"trial": 25, "trial_seed": 66, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.587, "large_model_accuracy": 0.819, "small_model_cost": 3.4313799999999936e-05, "large_model_cost": 0.0006700359999999993, "hybrid_results": {"0.05": {"accuracy": 0.788, "large_model_usage": 0.788, "small_model_usage": 0.21199999999999997, "avg_lambda": 0.8999999999999925, "avg_cost": 0.0005692657999999998, "unrestricted": {"accuracy": 0.788, "large_model_usage": 0.788, "avg_cost": 0.0005697337999999998}, "random_baseline": {"accuracy": 0.7872000000000001, "accuracy_std": 0.006079473661428271, "avg_cost": 0.0005699367200000001, "cost_std": 6.524211648436825e-06, "small_model_fraction": 0.15851294795116413}, "unrestricted_random_baseline": {"accuracy": 0.7894000000000001, "accuracy_std": 0.008345058418010034, "avg_cost": 0.00056731268, "cost_std": 7.661508570614537e-06, "small_model_fraction": 0.15777677734079376}}, "0.1": {"accuracy": 0.738, "large_model_usage": 0.532, "small_model_usage": 0.46799999999999997, "avg_lambda": 0.7599999999999948, "avg_cost": 0.00038677380000000014, "unrestricted": {"accuracy": 0.744, "large_model_usage": 0.532, "avg_cost": 0.00039466980000000005}, "random_baseline": {"accuracy": 0.7274, "accuracy_std": 0.015621779668142823, "avg_cost": 0.0003785339800000002, "cost_std": 1.0779912167527292e-05, "small_model_fraction": 0.4455754416001194}, "unrestricted_random_baseline": {"accuracy": 0.7272000000000001, "accuracy_std": 0.011872657663724674, "avg_cost": 0.00039381352000000016, "cost_std": 1.2779273123053566e-05, "small_model_fraction": 0.4331549220713065}}, "0.15": {"accuracy": 0.712, "large_model_usage": 0.498, "small_model_usage": 0.502, "avg_lambda": 0.6999999999999935, "avg_cost": 0.00035147779999999994, "unrestricted": {"accuracy": 0.742, "large_model_usage": 0.498, "avg_cost": 0.00037202580000000017}, "random_baseline": {"accuracy": 0.6981999999999999, "accuracy_std": 0.010332473082471559, "avg_cost": 0.00034802396000000005, "cost_std": 9.901419751853788e-06, "small_model_fraction": 0.501096548146344}, "unrestricted_random_baseline": {"accuracy": 0.7162000000000001, "accuracy_std": 0.01366601624468522, "avg_cost": 0.00036891476000000013, "cost_std": 1.6102277039425237e-05, "small_model_fraction": 0.46877425391153466}}, "0.2": {"accuracy": 0.664, "large_model_usage": 0.266, "small_model_usage": 0.734, "avg_lambda": 0.46000000000000196, "avg_cost": 0.00021024180000000006, "unrestricted": {"accuracy": 0.68, "large_model_usage": 0.266, "avg_cost": 0.0002194698000000001}, "random_baseline": {"accuracy": 0.6546, "accuracy_std": 0.00885663593019382, "avg_cost": 0.00021266710000000005, "cost_std": 9.969148653019487e-06, "small_model_fraction": 0.723262771065726}, "unrestricted_random_baseline": {"accuracy": 0.6524, "accuracy_std": 0.015919798993705932, "avg_cost": 0.00021642854000000002, "cost_std": 1.3073806309732412e-05, "small_model_fraction": 0.7087469967227819}}, "0.25": {"accuracy": 0.61, "large_model_usage": 0.192, "small_model_usage": 0.808, "avg_lambda": 0.25, "avg_cost": 0.0001465618, "unrestricted": {"accuracy": 0.644, "large_model_usage": 0.192, "avg_cost": 0.00016507780000000007}, "random_baseline": {"accuracy": 0.6324000000000001, "accuracy_std": 0.011723480711802285, "avg_cost": 0.00014766504, "cost_std": 8.780894736096091e-06, "small_model_fraction": 0.823432310528089}, "unrestricted_random_baseline": {"accuracy": 0.6352, "accuracy_std": 0.008255906976220124, "avg_cost": 0.00016156963999999996, "cost_std": 9.306674168487929e-06, "small_model_fraction": 0.7943063809947171}}}}