{"trial": 11, "trial_seed": 52, "subject": "MMLU", "method": "standard", "calibration_size": 500, "test_size": 500, "small_model_accuracy": 0.589, "large_model_accuracy": 0.846, "small_model_cost": 3.44052e-05, "large_model_cost": 0.0006724879999999981, "hybrid_results": {"0.05": {"accuracy": 0.778, "large_model_usage": 0.822, "small_model_usage": 0.17800000000000005, "avg_lambda": 0.8999999999999925, "avg_cost": 0.0005978198000000005, "unrestricted": {"accuracy": 0.778, "large_model_usage": 0.822, "avg_cost": 0.0005981638000000005}, "random_baseline": {"accuracy": 0.7962, "accuracy_std": 0.008553361912137245, "avg_cost": 0.0006018236199999999, "cost_std": 9.690870148526416e-06, "small_model_fraction": 0.1170196093673076}, "unrestricted_random_baseline": {"accuracy": 0.7938000000000001, "accuracy_std": 0.00922821759604747, "avg_cost": 0.0006016121799999998, "cost_std": 1.2316982692997593e-05, "small_model_fraction": 0.11648049438097668}}, "0.1": {"accuracy": 0.76, "large_model_usage": 0.62, "small_model_usage": 0.38, "avg_lambda": 0.800000000000007, "avg_cost": 0.0004586397999999996, "unrestricted": {"accuracy": 0.762, "large_model_usage": 0.62, "avg_cost": 0.0004625717999999997}, "random_baseline": {"accuracy": 0.7412, "accuracy_std": 0.01085172797300044, "avg_cost": 0.00046288599999999976, "cost_std": 1.39056348552665e-05, "small_model_fraction": 0.3351417715694564}, "unrestricted_random_baseline": {"accuracy": 0.7442, "accuracy_std": 0.011847362575695918, "avg_cost": 0.0004723587199999998, "cost_std": 1.1870634436356107e-05, "small_model_fraction": 0.32897956190011557}}, "0.15": {"accuracy": 0.712, "large_model_usage": 0.502, "small_model_usage": 0.498, "avg_lambda": 0.6999999999999935, "avg_cost": 0.00036035579999999976, "unrestricted": {"accuracy": 0.744, "large_model_usage": 0.502, "avg_cost": 0.00038052379999999985}, "random_baseline": {"accuracy": 0.7030000000000001, "accuracy_std": 0.013122499761859367, "avg_cost": 0.00035965809999999966, "cost_std": 1.4926717233270018e-05, "small_model_fraction": 0.4891719381873313}, "unrestricted_random_baseline": {"accuracy": 0.712, "accuracy_std": 0.009465727652959393, "avg_cost": 0.0003829235799999998, "cost_std": 1.4800807142706828e-05, "small_model_fraction": 0.4575647549189527}}, "0.2": {"accuracy": 0.672, "large_model_usage": 0.324, "small_model_usage": 0.6759999999999999, "avg_lambda": 0.5800000000000031, "avg_cost": 0.00024726379999999985, "unrestricted": {"accuracy": 0.678, "large_model_usage": 0.324, "avg_cost": 0.0002584477999999999}, "random_baseline": {"accuracy": 0.6534000000000001, "accuracy_std": 0.01200166655094201, "avg_cost": 0.00023956056, "cost_std": 1.69708194776328e-05, "small_model_fraction": 0.6664091243330795}, "unrestricted_random_baseline": {"accuracy": 0.6578, "accuracy_std": 0.01167732846159601, "avg_cost": 0.00025333116, "cost_std": 1.564976925984533e-05, "small_model_fraction": 0.6488816184984134}}, "0.25": {"accuracy": 0.636, "large_model_usage": 0.182, "small_model_usage": 0.8180000000000001, "avg_lambda": 0.319999999999996, "avg_cost": 0.00014551580000000003, "unrestricted": {"accuracy": 0.642, "large_model_usage": 0.182, "avg_cost": 0.0001612238}, "random_baseline": {"accuracy": 0.6217999999999999, "accuracy_std": 0.006029925372672539, "avg_cost": 0.00015312694, "cost_std": 7.82953619573985e-06, "small_model_fraction": 0.8258680534877285}, "unrestricted_random_baseline": {"accuracy": 0.6178, "accuracy_std": 0.010934349546269326, "avg_cost": 0.00016090463999999998, "cost_std": 9.201445635464003e-06, "small_model_fraction": 0.8012505587049199}}}}