[
  {
    "dataset": "227_cpu_small",
    "baseline": "mlp",
    "sample_size": 1638,
    "mse": 0.0011238315,
    "mse_std": 0.0002643466,
    "aug_mse": 0.00086904,
    "aug_mse_std": 0.0001039247,
    "delta_mse": -20.2386976005,
    "delta_mse_std": 12.4145439408,
    "p_wilcoxon": 0.0016927083,
    "p_wilcoxon_std": 0.002211626,
    "should_proceed": true
  },
  {
    "dataset": "227_cpu_small",
    "baseline": "mlp",
    "sample_size": 3276,
    "mse": 0.0009958408,
    "mse_std": 9.07444e-05,
    "aug_mse": 0.0008534927,
    "aug_mse_std": 7.29209e-05,
    "delta_mse": -14.0284755183,
    "delta_mse_std": 5.8484113591,
    "p_wilcoxon": 0.1276041667,
    "p_wilcoxon_std": 0.2293567216,
    "should_proceed": true
  },
  {
    "dataset": "227_cpu_small",
    "baseline": "mlp",
    "sample_size": 4914,
    "mse": 0.0009311602,
    "mse_std": 7.788e-05,
    "aug_mse": 0.0008221807,
    "aug_mse_std": 4.44451e-05,
    "delta_mse": -11.3115461734,
    "delta_mse_std": 5.9681090872,
    "p_wilcoxon": 0.0428385417,
    "p_wilcoxon_std": 0.059621911,
    "should_proceed": true
  },
  {
    "dataset": "227_cpu_small",
    "baseline": "mlp",
    "sample_size": 6552,
    "mse": 0.0008968359,
    "mse_std": 0.0001057707,
    "aug_mse": 0.0007947418,
    "aug_mse_std": 3.70751e-05,
    "delta_mse": -10.480211609,
    "delta_mse_std": 8.4038505176,
    "p_wilcoxon": 0.107421875,
    "p_wilcoxon_std": 0.162004233,
    "should_proceed": true
  },
  {
    "dataset": "227_cpu_small",
    "baseline": "mlp",
    "sample_size": 8190,
    "mse": 0.000869326,
    "mse_std": 4.9781e-05,
    "aug_mse": 0.0007798854,
    "aug_mse_std": 4.51722e-05,
    "delta_mse": -10.2314267728,
    "delta_mse_std": 3.1037572611,
    "p_wilcoxon": 0.0507161458,
    "p_wilcoxon_std": 0.0949839225,
    "should_proceed": true
  },
  {
    "dataset": "227_cpu_small",
    "baseline": "xgboost",
    "sample_size": 1638,
    "mse": 0.0009684102,
    "mse_std": 0.0001722803,
    "aug_mse": 0.0008946524,
    "aug_mse_std": 0.0001557418,
    "delta_mse": -6.991906628,
    "delta_mse_std": 10.378752062,
    "p_wilcoxon": 0.0083984375,
    "p_wilcoxon_std": 0.0162778645,
    "should_proceed": true
  },
  {
    "dataset": "227_cpu_small",
    "baseline": "xgboost",
    "sample_size": 3276,
    "mse": 0.0008846376,
    "mse_std": 0.0001003421,
    "aug_mse": 0.0007944689,
    "aug_mse_std": 5.97432e-05,
    "delta_mse": -9.4711121883,
    "delta_mse_std": 8.6065422986,
    "p_wilcoxon": 0.00234375,
    "p_wilcoxon_std": 0.0032053473,
    "should_proceed": true
  },
  {
    "dataset": "227_cpu_small",
    "baseline": "xgboost",
    "sample_size": 4914,
    "mse": 0.0007723208,
    "mse_std": 8.04171e-05,
    "aug_mse": 0.0007212466,
    "aug_mse_std": 4.64692e-05,
    "delta_mse": -6.1953830493,
    "delta_mse_std": 4.686618415,
    "p_wilcoxon": 0.0408854167,
    "p_wilcoxon_std": 0.1337664246,
    "should_proceed": true
  },
  {
    "dataset": "227_cpu_small",
    "baseline": "xgboost",
    "sample_size": 6552,
    "mse": 0.0007260401,
    "mse_std": 6.63902e-05,
    "aug_mse": 0.0006926907,
    "aug_mse_std": 3.60078e-05,
    "delta_mse": -4.1309136913,
    "delta_mse_std": 6.2267048271,
    "p_wilcoxon": 0.0333984375,
    "p_wilcoxon_std": 0.1143221752,
    "should_proceed": true
  },
  {
    "dataset": "227_cpu_small",
    "baseline": "xgboost",
    "sample_size": 8190,
    "mse": 0.0007381137,
    "mse_std": 7.89668e-05,
    "aug_mse": 0.0006952542,
    "aug_mse_std": 4.38917e-05,
    "delta_mse": -5.1877539359,
    "delta_mse_std": 7.4336605538,
    "p_wilcoxon": 0.05703125,
    "p_wilcoxon_std": 0.1799197435,
    "should_proceed": true
  },
  {
    "dataset": "294_satellite_image",
    "baseline": "mlp",
    "sample_size": 1287,
    "mse": 0.0203083252,
    "mse_std": 0.0038746726,
    "aug_mse": 0.0162851113,
    "aug_mse_std": 0.0022111608,
    "delta_mse": -18.3635703878,
    "delta_mse_std": 11.9428216045,
    "p_wilcoxon": 0.0011067708,
    "p_wilcoxon_std": 0.0003319674,
    "should_proceed": true
  },
  {
    "dataset": "294_satellite_image",
    "baseline": "mlp",
    "sample_size": 2574,
    "mse": 0.0174671595,
    "mse_std": 0.0014447418,
    "aug_mse": 0.0145468025,
    "aug_mse_std": 0.0015179723,
    "delta_mse": -16.6947967771,
    "delta_mse_std": 5.683456706,
    "p_wilcoxon": 0.0013671875,
    "p_wilcoxon_std": 0.0014615849,
    "should_proceed": true
  },
  {
    "dataset": "294_satellite_image",
    "baseline": "mlp",
    "sample_size": 3861,
    "mse": 0.0158484439,
    "mse_std": 0.0020361126,
    "aug_mse": 0.0121149333,
    "aug_mse_std": 0.0013722577,
    "delta_mse": -23.1436070104,
    "delta_mse_std": 6.7364512656,
    "p_wilcoxon": 0.0009765625,
    "p_wilcoxon_std": 0.0,
    "should_proceed": true
  },
  {
    "dataset": "294_satellite_image",
    "baseline": "mlp",
    "sample_size": 5148,
    "mse": 0.0141532345,
    "mse_std": 0.0016864748,
    "aug_mse": 0.010759875,
    "aug_mse_std": 0.0011387421,
    "delta_mse": -23.7181160666,
    "delta_mse_std": 4.7771560084,
    "p_wilcoxon": 0.0009765625,
    "p_wilcoxon_std": 0.0,
    "should_proceed": true
  },
  {
    "dataset": "294_satellite_image",
    "baseline": "mlp",
    "sample_size": 6435,
    "mse": 0.0123183687,
    "mse_std": 0.0013260804,
    "aug_mse": 0.0098880747,
    "aug_mse_std": 0.0010815225,
    "delta_mse": -19.6639996719,
    "delta_mse_std": 4.0427199241,
    "p_wilcoxon": 0.0011067708,
    "p_wilcoxon_std": 0.000487195,
    "should_proceed": true
  },
  {
    "dataset": "294_satellite_image",
    "baseline": "xgboost",
    "sample_size": 1287,
    "mse": 0.017775054,
    "mse_std": 0.0017662952,
    "aug_mse": 0.0169738291,
    "aug_mse_std": 0.0019912758,
    "delta_mse": -4.5373099092,
    "delta_mse_std": 5.4824151476,
    "p_wilcoxon": 0.0010416667,
    "p_wilcoxon_std": 0.0002435975,
    "should_proceed": true
  },
  {
    "dataset": "294_satellite_image",
    "baseline": "xgboost",
    "sample_size": 2574,
    "mse": 0.0163605585,
    "mse_std": 0.0013729906,
    "aug_mse": 0.0157619691,
    "aug_mse_std": 0.0015501265,
    "delta_mse": -3.7274594784,
    "delta_mse_std": 3.2660780031,
    "p_wilcoxon": 0.0009765625,
    "p_wilcoxon_std": 0.0,
    "should_proceed": true
  },
  {
    "dataset": "294_satellite_image",
    "baseline": "xgboost",
    "sample_size": 3861,
    "mse": 0.0145987487,
    "mse_std": 0.0013194619,
    "aug_mse": 0.0138981523,
    "aug_mse_std": 0.0013242837,
    "delta_mse": -4.7917811441,
    "delta_mse_std": 3.1037423988,
    "p_wilcoxon": 0.0009765625,
    "p_wilcoxon_std": 0.0,
    "should_proceed": true
  },
  {
    "dataset": "294_satellite_image",
    "baseline": "xgboost",
    "sample_size": 5148,
    "mse": 0.0136606534,
    "mse_std": 0.0012392827,
    "aug_mse": 0.0130038449,
    "aug_mse_std": 0.0011702119,
    "delta_mse": -4.7301237271,
    "delta_mse_std": 4.076376397,
    "p_wilcoxon": 0.0009765625,
    "p_wilcoxon_std": 0.0,
    "should_proceed": true
  },
  {
    "dataset": "294_satellite_image",
    "baseline": "xgboost",
    "sample_size": 6435,
    "mse": 0.0125401733,
    "mse_std": 0.0011169941,
    "aug_mse": 0.0118629113,
    "aug_mse_std": 0.0010100824,
    "delta_mse": -5.3072662084,
    "delta_mse_std": 3.173468634,
    "p_wilcoxon": 0.0009765625,
    "p_wilcoxon_std": 0.0,
    "should_proceed": true
  },
  {
    "dataset": "503_wind",
    "baseline": "mlp",
    "sample_size": 1314,
    "mse": 0.0075240459,
    "mse_std": 0.000946527,
    "aug_mse": 0.006970376,
    "aug_mse_std": 0.0009245335,
    "delta_mse": -7.2209159192,
    "delta_mse_std": 5.815153701,
    "p_wilcoxon": 0.0010416667,
    "p_wilcoxon_std": 0.0002435975,
    "should_proceed": true
  },
  {
    "dataset": "503_wind",
    "baseline": "mlp",
    "sample_size": 2628,
    "mse": 0.0062135591,
    "mse_std": 0.0006330271,
    "aug_mse": 0.0056191457,
    "aug_mse_std": 0.0004101676,
    "delta_mse": -9.1724825863,
    "delta_mse_std": 5.4931155115,
    "p_wilcoxon": 0.0118489583,
    "p_wilcoxon_std": 0.033909585,
    "should_proceed": true
  },
  {
    "dataset": "503_wind",
    "baseline": "mlp",
    "sample_size": 3942,
    "mse": 0.0059300309,
    "mse_std": 0.0002840572,
    "aug_mse": 0.0053949338,
    "aug_mse_std": 0.0003150533,
    "delta_mse": -9.0338542413,
    "delta_mse_std": 2.7328633843,
    "p_wilcoxon": 0.0080729167,
    "p_wilcoxon_std": 0.0160127136,
    "should_proceed": true
  },
  {
    "dataset": "503_wind",
    "baseline": "mlp",
    "sample_size": 5256,
    "mse": 0.0056745053,
    "mse_std": 0.0003280609,
    "aug_mse": 0.0053256498,
    "aug_mse_std": 0.0003278649,
    "delta_mse": -6.1529214923,
    "delta_mse_std": 1.6647155422,
    "p_wilcoxon": 0.0438802083,
    "p_wilcoxon_std": 0.0458360151,
    "should_proceed": true
  },
  {
    "dataset": "503_wind",
    "baseline": "mlp",
    "sample_size": 6570,
    "mse": 0.0052975485,
    "mse_std": 0.0001530845,
    "aug_mse": 0.0050024874,
    "aug_mse_std": 0.0001624734,
    "delta_mse": -5.5636481939,
    "delta_mse_std": 1.800588037,
    "p_wilcoxon": 0.0782552083,
    "p_wilcoxon_std": 0.1092574393,
    "should_proceed": false
  },
  {
    "dataset": "503_wind",
    "baseline": "xgboost",
    "sample_size": 1314,
    "mse": 0.0074243584,
    "mse_std": 0.0010826232,
    "aug_mse": 0.0072082917,
    "aug_mse_std": 0.0010732906,
    "delta_mse": -2.8220220557,
    "delta_mse_std": 4.529316541,
    "p_wilcoxon": 0.0011067708,
    "p_wilcoxon_std": 0.0003319674,
    "should_proceed": true
  },
  {
    "dataset": "503_wind",
    "baseline": "xgboost",
    "sample_size": 2628,
    "mse": 0.0060215845,
    "mse_std": 0.0004475405,
    "aug_mse": 0.0060340257,
    "aug_mse_std": 0.0004776307,
    "delta_mse": 0.2005803988,
    "delta_mse_std": 2.4198980175,
    "p_wilcoxon": 0.0011067708,
    "p_wilcoxon_std": 0.000487195,
    "should_proceed": true
  },
  {
    "dataset": "503_wind",
    "baseline": "xgboost",
    "sample_size": 3942,
    "mse": 0.0058625259,
    "mse_std": 0.0003040187,
    "aug_mse": 0.0057848993,
    "aug_mse_std": 0.0003155681,
    "delta_mse": -1.3271485141,
    "delta_mse_std": 1.3994460991,
    "p_wilcoxon": 0.0013671875,
    "p_wilcoxon_std": 0.0014615849,
    "should_proceed": true
  },
  {
    "dataset": "503_wind",
    "baseline": "xgboost",
    "sample_size": 5256,
    "mse": 0.0057021988,
    "mse_std": 0.0002322959,
    "aug_mse": 0.0056237492,
    "aug_mse_std": 0.000269723,
    "delta_mse": -1.3966173673,
    "delta_mse_std": 1.4838872487,
    "p_wilcoxon": 0.0022786458,
    "p_wilcoxon_std": 0.0033766413,
    "should_proceed": true
  },
  {
    "dataset": "503_wind",
    "baseline": "xgboost",
    "sample_size": 6570,
    "mse": 0.0052759588,
    "mse_std": 0.0001891419,
    "aug_mse": 0.0052184711,
    "aug_mse_std": 0.0001894761,
    "delta_mse": -1.0835220619,
    "delta_mse_std": 1.2515814703,
    "p_wilcoxon": 0.001171875,
    "p_wilcoxon_std": 0.000390625,
    "should_proceed": true
  },
  {
    "dataset": "623_fri_c4_1000_10",
    "baseline": "mlp",
    "sample_size": 200,
    "mse": 0.0199291561,
    "mse_std": 0.0066630714,
    "aug_mse": 0.0138739265,
    "aug_mse_std": 0.0060805084,
    "delta_mse": -28.8028005691,
    "delta_mse_std": 24.2241706578,
    "p_wilcoxon": 0.0009765625,
    "p_wilcoxon_std": 0.0,
    "should_proceed": true
  },
  {
    "dataset": "623_fri_c4_1000_10",
    "baseline": "mlp",
    "sample_size": 400,
    "mse": 0.006101159,
    "mse_std": 0.0013962993,
    "aug_mse": 0.0038394591,
    "aug_mse_std": 0.0011925659,
    "delta_mse": -36.926462981,
    "delta_mse_std": 11.5651910285,
    "p_wilcoxon": 0.0013020833,
    "p_wilcoxon_std": 0.0009873536,
    "should_proceed": true
  },
  {
    "dataset": "623_fri_c4_1000_10",
    "baseline": "mlp",
    "sample_size": 600,
    "mse": 0.0032125378,
    "mse_std": 0.0010023969,
    "aug_mse": 0.0022752395,
    "aug_mse_std": 0.0007232606,
    "delta_mse": -27.9143981261,
    "delta_mse_std": 11.7042359219,
    "p_wilcoxon": 0.00234375,
    "p_wilcoxon_std": 0.0043644078,
    "should_proceed": true
  },
  {
    "dataset": "623_fri_c4_1000_10",
    "baseline": "mlp",
    "sample_size": 800,
    "mse": 0.0022251407,
    "mse_std": 0.0006229685,
    "aug_mse": 0.0014020309,
    "aug_mse_std": 0.0003078158,
    "delta_mse": -34.1186926951,
    "delta_mse_std": 15.4988264595,
    "p_wilcoxon": 0.0015625,
    "p_wilcoxon_std": 0.0014615849,
    "should_proceed": true
  },
  {
    "dataset": "623_fri_c4_1000_10",
    "baseline": "mlp",
    "sample_size": 1000,
    "mse": 0.0022003809,
    "mse_std": 0.0005158029,
    "aug_mse": 0.0012309104,
    "aug_mse_std": 0.0002663929,
    "delta_mse": -42.3339861952,
    "delta_mse_std": 11.9953907672,
    "p_wilcoxon": 0.002734375,
    "p_wilcoxon_std": 0.0036400158,
    "should_proceed": true
  },
  {
    "dataset": "623_fri_c4_1000_10",
    "baseline": "xgboost",
    "sample_size": 200,
    "mse": 0.00652015,
    "mse_std": 0.0016466699,
    "aug_mse": 0.0056434291,
    "aug_mse_std": 0.0011948268,
    "delta_mse": -11.9977724686,
    "delta_mse_std": 14.2609257811,
    "p_wilcoxon": 0.0039713542,
    "p_wilcoxon_std": 0.0063934811,
    "should_proceed": true
  },
  {
    "dataset": "623_fri_c4_1000_10",
    "baseline": "xgboost",
    "sample_size": 400,
    "mse": 0.003267049,
    "mse_std": 0.0010226166,
    "aug_mse": 0.0031241912,
    "aug_mse_std": 0.0008666808,
    "delta_mse": -3.1596450449,
    "delta_mse_std": 9.5058582596,
    "p_wilcoxon": 0.0020182292,
    "p_wilcoxon_std": 0.0023237719,
    "should_proceed": true
  },
  {
    "dataset": "623_fri_c4_1000_10",
    "baseline": "xgboost",
    "sample_size": 600,
    "mse": 0.0026399279,
    "mse_std": 0.0003261453,
    "aug_mse": 0.002416454,
    "aug_mse_std": 0.0002755062,
    "delta_mse": -7.9354193387,
    "delta_mse_std": 8.372507012,
    "p_wilcoxon": 0.0011067708,
    "p_wilcoxon_std": 0.0003319674,
    "should_proceed": true
  },
  {
    "dataset": "623_fri_c4_1000_10",
    "baseline": "xgboost",
    "sample_size": 800,
    "mse": 0.0016464314,
    "mse_std": 0.0003041793,
    "aug_mse": 0.0016115413,
    "aug_mse_std": 0.0003416937,
    "delta_mse": -2.2333281885,
    "delta_mse_std": 7.7866457387,
    "p_wilcoxon": 0.0014322917,
    "p_wilcoxon_std": 0.0010001492,
    "should_proceed": true
  },
  {
    "dataset": "623_fri_c4_1000_10",
    "baseline": "xgboost",
    "sample_size": 1000,
    "mse": 0.0015241163,
    "mse_std": 0.0001776289,
    "aug_mse": 0.0014538046,
    "aug_mse_std": 0.0002334126,
    "delta_mse": -4.585585395,
    "delta_mse_std": 10.9878345156,
    "p_wilcoxon": 0.0012369792,
    "p_wilcoxon_std": 0.0009743899,
    "should_proceed": true
  },
  {
    "dataset": "ConcreteCompressiveStrength",
    "baseline": "mlp",
    "sample_size": 201,
    "mse": 0.0103338088,
    "mse_std": 0.0039780552,
    "aug_mse": 0.0079254576,
    "aug_mse_std": 0.0019279507,
    "delta_mse": -17.8047573958,
    "delta_mse_std": 22.0787279706,
    "p_wilcoxon": 0.0011067708,
    "p_wilcoxon_std": 0.0003319674,
    "should_proceed": true
  },
  {
    "dataset": "ConcreteCompressiveStrength",
    "baseline": "mlp",
    "sample_size": 402,
    "mse": 0.0063545642,
    "mse_std": 0.0020690332,
    "aug_mse": 0.0049555397,
    "aug_mse_std": 0.001416109,
    "delta_mse": -19.8265318401,
    "delta_mse_std": 10.8960051211,
    "p_wilcoxon": 0.001171875,
    "p_wilcoxon_std": 0.0005289088,
    "should_proceed": true
  },
  {
    "dataset": "ConcreteCompressiveStrength",
    "baseline": "mlp",
    "sample_size": 603,
    "mse": 0.0060244946,
    "mse_std": 0.0005457961,
    "aug_mse": 0.0049390007,
    "aug_mse_std": 0.0005258903,
    "delta_mse": -17.6366638937,
    "delta_mse_std": 9.52188954,
    "p_wilcoxon": 0.0027994792,
    "p_wilcoxon_std": 0.0058034075,
    "should_proceed": true
  },
  {
    "dataset": "ConcreteCompressiveStrength",
    "baseline": "mlp",
    "sample_size": 804,
    "mse": 0.0049660037,
    "mse_std": 0.0009900486,
    "aug_mse": 0.0036123173,
    "aug_mse_std": 0.0004910871,
    "delta_mse": -24.7676394811,
    "delta_mse_std": 15.760261098,
    "p_wilcoxon": 0.0018880208,
    "p_wilcoxon_std": 0.0031587264,
    "should_proceed": true
  },
  {
    "dataset": "ConcreteCompressiveStrength",
    "baseline": "mlp",
    "sample_size": 1005,
    "mse": 0.0042238942,
    "mse_std": 0.0009360391,
    "aug_mse": 0.0030642384,
    "aug_mse_std": 0.0006228361,
    "delta_mse": -26.8961526443,
    "delta_mse_std": 6.6530250668,
    "p_wilcoxon": 0.0009765625,
    "p_wilcoxon_std": 0.0,
    "should_proceed": true
  },
  {
    "dataset": "ConcreteCompressiveStrength",
    "baseline": "xgboost",
    "sample_size": 201,
    "mse": 0.0077658539,
    "mse_std": 0.00264503,
    "aug_mse": 0.0070109814,
    "aug_mse_std": 0.0024356553,
    "delta_mse": -8.0097410763,
    "delta_mse_std": 14.0935333594,
    "p_wilcoxon": 0.0027994792,
    "p_wilcoxon_std": 0.0047423369,
    "should_proceed": true
  },
  {
    "dataset": "ConcreteCompressiveStrength",
    "baseline": "xgboost",
    "sample_size": 402,
    "mse": 0.0049293004,
    "mse_std": 0.0013507416,
    "aug_mse": 0.004532391,
    "aug_mse_std": 0.001419668,
    "delta_mse": -8.4337362244,
    "delta_mse_std": 10.4214978151,
    "p_wilcoxon": 0.005859375,
    "p_wilcoxon_std": 0.0126677385,
    "should_proceed": true
  },
  {
    "dataset": "ConcreteCompressiveStrength",
    "baseline": "xgboost",
    "sample_size": 603,
    "mse": 0.0047334768,
    "mse_std": 0.0009409073,
    "aug_mse": 0.0042688907,
    "aug_mse_std": 0.0009233831,
    "delta_mse": -9.7478183288,
    "delta_mse_std": 8.3443611534,
    "p_wilcoxon": 0.0022786458,
    "p_wilcoxon_std": 0.0020690428,
    "should_proceed": true
  },
  {
    "dataset": "ConcreteCompressiveStrength",
    "baseline": "xgboost",
    "sample_size": 804,
    "mse": 0.0036512537,
    "mse_std": 0.0006456758,
    "aug_mse": 0.0030678783,
    "aug_mse_std": 0.0005603168,
    "delta_mse": -15.724966637,
    "delta_mse_std": 7.2517654379,
    "p_wilcoxon": 0.0024088542,
    "p_wilcoxon_std": 0.0043507902,
    "should_proceed": true
  },
  {
    "dataset": "ConcreteCompressiveStrength",
    "baseline": "xgboost",
    "sample_size": 1005,
    "mse": 0.0028980348,
    "mse_std": 0.0003806686,
    "aug_mse": 0.0025594153,
    "aug_mse_std": 0.0004899787,
    "delta_mse": -12.1867421997,
    "delta_mse_std": 7.7044726959,
    "p_wilcoxon": 0.0099609375,
    "p_wilcoxon_std": 0.028496444,
    "should_proceed": true
  },
  {
    "dataset": "EnergyEfficiency",
    "baseline": "mlp",
    "sample_size": 153,
    "mse": 0.0058305922,
    "mse_std": 0.0018421546,
    "aug_mse": 0.0042578125,
    "aug_mse_std": 0.0017940478,
    "delta_mse": -25.1028092847,
    "delta_mse_std": 26.3073219477,
    "p_wilcoxon": 0.0108723958,
    "p_wilcoxon_std": 0.0169666005,
    "should_proceed": true
  },
  {
    "dataset": "EnergyEfficiency",
    "baseline": "mlp",
    "sample_size": 306,
    "mse": 0.0032134679,
    "mse_std": 0.0005590257,
    "aug_mse": 0.0023321361,
    "aug_mse_std": 0.0008016627,
    "delta_mse": -28.1256615701,
    "delta_mse_std": 18.7258835212,
    "p_wilcoxon": 0.0029296875,
    "p_wilcoxon_std": 0.0044964314,
    "should_proceed": true
  },
  {
    "dataset": "EnergyEfficiency",
    "baseline": "mlp",
    "sample_size": 459,
    "mse": 0.0018839916,
    "mse_std": 0.0005838401,
    "aug_mse": 0.0010630391,
    "aug_mse_std": 0.0004880462,
    "delta_mse": -42.9840586933,
    "delta_mse_std": 18.0009202236,
    "p_wilcoxon": 0.0009765625,
    "p_wilcoxon_std": 0.0,
    "should_proceed": true
  },
  {
    "dataset": "EnergyEfficiency",
    "baseline": "mlp",
    "sample_size": 612,
    "mse": 0.0009058152,
    "mse_std": 0.0003053578,
    "aug_mse": 0.0005222332,
    "aug_mse_std": 0.0002061769,
    "delta_mse": -40.709621837,
    "delta_mse_std": 15.0553832906,
    "p_wilcoxon": 0.0014322917,
    "p_wilcoxon_std": 0.0010618168,
    "should_proceed": true
  },
  {
    "dataset": "EnergyEfficiency",
    "baseline": "mlp",
    "sample_size": 765,
    "mse": 0.0005273754,
    "mse_std": 0.0002907157,
    "aug_mse": 0.0003479677,
    "aug_mse_std": 0.0001235001,
    "delta_mse": -28.3055421836,
    "delta_mse_std": 16.5100404136,
    "p_wilcoxon": 0.0130208333,
    "p_wilcoxon_std": 0.0338115718,
    "should_proceed": true
  },
  {
    "dataset": "EnergyEfficiency",
    "baseline": "xgboost",
    "sample_size": 153,
    "mse": 0.003990888,
    "mse_std": 0.00187532,
    "aug_mse": 0.0034449311,
    "aug_mse_std": 0.0019471062,
    "delta_mse": -13.3298745849,
    "delta_mse_std": 29.8607222304,
    "p_wilcoxon": 0.0617838542,
    "p_wilcoxon_std": 0.0992925841,
    "should_proceed": true
  },
  {
    "dataset": "EnergyEfficiency",
    "baseline": "xgboost",
    "sample_size": 306,
    "mse": 0.0023316135,
    "mse_std": 0.0005595757,
    "aug_mse": 0.0020609256,
    "aug_mse_std": 0.0005739312,
    "delta_mse": -12.2044676367,
    "delta_mse_std": 12.1638607539,
    "p_wilcoxon": 0.0345703125,
    "p_wilcoxon_std": 0.0663409222,
    "should_proceed": true
  },
  {
    "dataset": "EnergyEfficiency",
    "baseline": "xgboost",
    "sample_size": 459,
    "mse": 0.0016528977,
    "mse_std": 0.0004473421,
    "aug_mse": 0.001431547,
    "aug_mse_std": 0.0004353489,
    "delta_mse": -10.5485972136,
    "delta_mse_std": 22.9523445636,
    "p_wilcoxon": 0.0253255208,
    "p_wilcoxon_std": 0.0297265939,
    "should_proceed": true
  },
  {
    "dataset": "EnergyEfficiency",
    "baseline": "xgboost",
    "sample_size": 612,
    "mse": 0.0012807429,
    "mse_std": 0.0002871458,
    "aug_mse": 0.000998468,
    "aug_mse_std": 0.0002383854,
    "delta_mse": -19.349168089,
    "delta_mse_std": 20.690718407,
    "p_wilcoxon": 0.0149088542,
    "p_wilcoxon_std": 0.0194544933,
    "should_proceed": true
  },
  {
    "dataset": "EnergyEfficiency",
    "baseline": "xgboost",
    "sample_size": 765,
    "mse": 0.0009675013,
    "mse_std": 0.0002414944,
    "aug_mse": 0.0007612863,
    "aug_mse_std": 0.0002521353,
    "delta_mse": -20.9602765222,
    "delta_mse_std": 16.9361632466,
    "p_wilcoxon": 0.0419921875,
    "p_wilcoxon_std": 0.0527295522,
    "should_proceed": true
  },
  {
    "dataset": "HousePrice",
    "baseline": "mlp",
    "sample_size": 200,
    "mse": 0.0010206246,
    "mse_std": 0.0004368029,
    "aug_mse": 0.0005719088,
    "aug_mse_std": 0.0002640565,
    "delta_mse": -40.5662148407,
    "delta_mse_std": 18.7073035656,
    "p_wilcoxon": 0.00234375,
    "p_wilcoxon_std": 0.0017758049,
    "should_proceed": true
  },
  {
    "dataset": "HousePrice",
    "baseline": "mlp",
    "sample_size": 400,
    "mse": 0.000409953,
    "mse_std": 0.0001196617,
    "aug_mse": 0.0002487452,
    "aug_mse_std": 5.53294e-05,
    "delta_mse": -37.0202349218,
    "delta_mse_std": 14.3704196136,
    "p_wilcoxon": 0.0016276042,
    "p_wilcoxon_std": 0.0014988105,
    "should_proceed": true
  },
  {
    "dataset": "HousePrice",
    "baseline": "mlp",
    "sample_size": 600,
    "mse": 0.0002904957,
    "mse_std": 9.13211e-05,
    "aug_mse": 0.0001972402,
    "aug_mse_std": 6.32397e-05,
    "delta_mse": -30.1357299355,
    "delta_mse_std": 14.6022722988,
    "p_wilcoxon": 0.0049479167,
    "p_wilcoxon_std": 0.0128544051,
    "should_proceed": true
  },
  {
    "dataset": "HousePrice",
    "baseline": "mlp",
    "sample_size": 800,
    "mse": 0.0002315977,
    "mse_std": 4.06486e-05,
    "aug_mse": 0.0001576826,
    "aug_mse_std": 2.51985e-05,
    "delta_mse": -30.3245655007,
    "delta_mse_std": 15.869294788,
    "p_wilcoxon": 0.0012369792,
    "p_wilcoxon_std": 0.0005600472,
    "should_proceed": true
  },
  {
    "dataset": "HousePrice",
    "baseline": "mlp",
    "sample_size": 1000,
    "mse": 0.0001921743,
    "mse_std": 4.61762e-05,
    "aug_mse": 0.0001377251,
    "aug_mse_std": 2.22444e-05,
    "delta_mse": -26.9662192293,
    "delta_mse_std": 9.8113535761,
    "p_wilcoxon": 0.0020182292,
    "p_wilcoxon_std": 0.0021826894,
    "should_proceed": true
  },
  {
    "dataset": "HousePrice",
    "baseline": "xgboost",
    "sample_size": 200,
    "mse": 0.0007851708,
    "mse_std": 0.0003260483,
    "aug_mse": 0.0006352743,
    "aug_mse_std": 0.0001884516,
    "delta_mse": -14.2290712263,
    "delta_mse_std": 18.4339856985,
    "p_wilcoxon": 0.0014322917,
    "p_wilcoxon_std": 0.0010001492,
    "should_proceed": true
  },
  {
    "dataset": "HousePrice",
    "baseline": "xgboost",
    "sample_size": 400,
    "mse": 0.0003274794,
    "mse_std": 7.53139e-05,
    "aug_mse": 0.0003097462,
    "aug_mse_std": 7.91191e-05,
    "delta_mse": -5.39357942,
    "delta_mse_std": 8.7667500781,
    "p_wilcoxon": 0.0037109375,
    "p_wilcoxon_std": 0.0102310944,
    "should_proceed": true
  },
  {
    "dataset": "HousePrice",
    "baseline": "xgboost",
    "sample_size": 600,
    "mse": 0.0002696999,
    "mse_std": 6.43067e-05,
    "aug_mse": 0.0002552408,
    "aug_mse_std": 6.28181e-05,
    "delta_mse": -4.8740023156,
    "delta_mse_std": 10.5395226817,
    "p_wilcoxon": 0.0015625,
    "p_wilcoxon_std": 0.0014615849,
    "should_proceed": true
  },
  {
    "dataset": "HousePrice",
    "baseline": "xgboost",
    "sample_size": 800,
    "mse": 0.0002429755,
    "mse_std": 3.67805e-05,
    "aug_mse": 0.0002198606,
    "aug_mse_std": 4.37059e-05,
    "delta_mse": -9.8565220739,
    "delta_mse_std": 7.7320904875,
    "p_wilcoxon": 0.0033854167,
    "p_wilcoxon_std": 0.0064469563,
    "should_proceed": true
  },
  {
    "dataset": "HousePrice",
    "baseline": "xgboost",
    "sample_size": 1000,
    "mse": 0.0001955208,
    "mse_std": 2.49318e-05,
    "aug_mse": 0.0001826318,
    "aug_mse_std": 2.54664e-05,
    "delta_mse": -6.4951084941,
    "delta_mse_std": 7.257858085,
    "p_wilcoxon": 0.0013020833,
    "p_wilcoxon_std": 0.0009873536,
    "should_proceed": true
  },
  {
    "dataset": "ParkinsonsTelemonitoring",
    "baseline": "mlp",
    "sample_size": 1175,
    "mse": 0.0016495489,
    "mse_std": 0.0004476986,
    "aug_mse": 0.001013234,
    "aug_mse_std": 0.0002250524,
    "delta_mse": -36.1745243434,
    "delta_mse_std": 15.111272165,
    "p_wilcoxon": 0.0011067708,
    "p_wilcoxon_std": 0.0003319674,
    "should_proceed": true
  },
  {
    "dataset": "ParkinsonsTelemonitoring",
    "baseline": "mlp",
    "sample_size": 2350,
    "mse": 0.0007987489,
    "mse_std": 0.000184423,
    "aug_mse": 0.0005371293,
    "aug_mse_std": 0.0001223468,
    "delta_mse": -31.823810337,
    "delta_mse_std": 9.8266616714,
    "p_wilcoxon": 0.0069010417,
    "p_wilcoxon_std": 0.016064247,
    "should_proceed": true
  },
  {
    "dataset": "ParkinsonsTelemonitoring",
    "baseline": "mlp",
    "sample_size": 3525,
    "mse": 0.0004837592,
    "mse_std": 0.0001110846,
    "aug_mse": 0.0002963744,
    "aug_mse_std": 6.70434e-05,
    "delta_mse": -36.5965484468,
    "delta_mse_std": 15.3367684577,
    "p_wilcoxon": 0.002734375,
    "p_wilcoxon_std": 0.0047601788,
    "should_proceed": true
  },
  {
    "dataset": "ParkinsonsTelemonitoring",
    "baseline": "mlp",
    "sample_size": 4700,
    "mse": 0.0004221506,
    "mse_std": 0.0001194898,
    "aug_mse": 0.0002129148,
    "aug_mse_std": 4.23446e-05,
    "delta_mse": -46.4030097668,
    "delta_mse_std": 15.8016505237,
    "p_wilcoxon": 0.0040364583,
    "p_wilcoxon_std": 0.0078357943,
    "should_proceed": true
  },
  {
    "dataset": "ParkinsonsTelemonitoring",
    "baseline": "mlp",
    "sample_size": 5875,
    "mse": 0.0002594705,
    "mse_std": 9.26327e-05,
    "aug_mse": 0.0001288982,
    "aug_mse_std": 4.74774e-05,
    "delta_mse": -47.2261319695,
    "delta_mse_std": 17.9080068295,
    "p_wilcoxon": 0.0110026042,
    "p_wilcoxon_std": 0.0339294535,
    "should_proceed": true
  },
  {
    "dataset": "ParkinsonsTelemonitoring",
    "baseline": "xgboost",
    "sample_size": 1175,
    "mse": 0.0007856493,
    "mse_std": 9.98688e-05,
    "aug_mse": 0.0007199847,
    "aug_mse_std": 0.0001185452,
    "delta_mse": -8.3998202644,
    "delta_mse_std": 9.2543215299,
    "p_wilcoxon": 0.0010416667,
    "p_wilcoxon_std": 0.0002435975,
    "should_proceed": true
  },
  {
    "dataset": "ParkinsonsTelemonitoring",
    "baseline": "xgboost",
    "sample_size": 2350,
    "mse": 0.0003439428,
    "mse_std": 7.03571e-05,
    "aug_mse": 0.0003171072,
    "aug_mse_std": 5.04363e-05,
    "delta_mse": -6.6000643913,
    "delta_mse_std": 10.8737466308,
    "p_wilcoxon": 0.0083984375,
    "p_wilcoxon_std": 0.015737712,
    "should_proceed": true
  },
  {
    "dataset": "ParkinsonsTelemonitoring",
    "baseline": "xgboost",
    "sample_size": 3525,
    "mse": 0.0002069116,
    "mse_std": 4.61147e-05,
    "aug_mse": 0.0001998407,
    "aug_mse_std": 4.58043e-05,
    "delta_mse": -2.7941734861,
    "delta_mse_std": 12.9941563142,
    "p_wilcoxon": 0.0434244792,
    "p_wilcoxon_std": 0.0904368351,
    "should_proceed": true
  },
  {
    "dataset": "ParkinsonsTelemonitoring",
    "baseline": "xgboost",
    "sample_size": 4700,
    "mse": 0.0001475394,
    "mse_std": 2.61846e-05,
    "aug_mse": 0.0001382391,
    "aug_mse_std": 2.83771e-05,
    "delta_mse": -6.2643529246,
    "delta_mse_std": 9.3016354935,
    "p_wilcoxon": 0.0203776042,
    "p_wilcoxon_std": 0.0233637802,
    "should_proceed": true
  },
  {
    "dataset": "ParkinsonsTelemonitoring",
    "baseline": "xgboost",
    "sample_size": 5875,
    "mse": 0.0001099403,
    "mse_std": 3.08954e-05,
    "aug_mse": 0.0001127258,
    "aug_mse_std": 4.02757e-05,
    "delta_mse": 1.6511605332,
    "delta_mse_std": 14.5483180385,
    "p_wilcoxon": 0.2736328125,
    "p_wilcoxon_std": 0.3681855963,
    "should_proceed": false
  },
  {
    "dataset": "WineQuality",
    "baseline": "mlp",
    "sample_size": 1063,
    "mse": 0.0229120147,
    "mse_std": 0.0034146243,
    "aug_mse": 0.0228354158,
    "aug_mse_std": 0.0049935338,
    "delta_mse": -0.3413939784,
    "delta_mse_std": 12.7515032794,
    "p_wilcoxon": 0.00546875,
    "p_wilcoxon_std": 0.008711317,
    "should_proceed": true
  },
  {
    "dataset": "WineQuality",
    "baseline": "mlp",
    "sample_size": 2126,
    "mse": 0.0153944315,
    "mse_std": 0.0010030055,
    "aug_mse": 0.0145811942,
    "aug_mse_std": 0.0012539086,
    "delta_mse": -5.2413869418,
    "delta_mse_std": 6.1122511469,
    "p_wilcoxon": 0.0024739583,
    "p_wilcoxon_std": 0.0032850197,
    "should_proceed": true
  },
  {
    "dataset": "WineQuality",
    "baseline": "mlp",
    "sample_size": 3189,
    "mse": 0.0147843296,
    "mse_std": 0.0008877228,
    "aug_mse": 0.0142251295,
    "aug_mse_std": 0.0009203952,
    "delta_mse": -3.6293457416,
    "delta_mse_std": 5.8469214639,
    "p_wilcoxon": 0.0108072917,
    "p_wilcoxon_std": 0.0206251808,
    "should_proceed": true
  },
  {
    "dataset": "WineQuality",
    "baseline": "mlp",
    "sample_size": 4252,
    "mse": 0.0138623337,
    "mse_std": 0.0010533247,
    "aug_mse": 0.0132346822,
    "aug_mse_std": 0.000950292,
    "delta_mse": -4.4438494342,
    "delta_mse_std": 3.1813641643,
    "p_wilcoxon": 0.0124348958,
    "p_wilcoxon_std": 0.0150507542,
    "should_proceed": true
  },
  {
    "dataset": "WineQuality",
    "baseline": "mlp",
    "sample_size": 5315,
    "mse": 0.0139716229,
    "mse_std": 0.0006308329,
    "aug_mse": 0.0132786146,
    "aug_mse_std": 0.000744678,
    "delta_mse": -4.9853832284,
    "delta_mse_std": 2.2873773018,
    "p_wilcoxon": 0.0255208333,
    "p_wilcoxon_std": 0.03103332,
    "should_proceed": true
  },
  {
    "dataset": "WineQuality",
    "baseline": "xgboost",
    "sample_size": 1063,
    "mse": 0.0205733952,
    "mse_std": 0.0021618803,
    "aug_mse": 0.0206208483,
    "aug_mse_std": 0.0020859411,
    "delta_mse": 0.3061982376,
    "delta_mse_std": 2.9212723901,
    "p_wilcoxon": 0.0013020833,
    "p_wilcoxon_std": 0.0009873536,
    "should_proceed": true
  },
  {
    "dataset": "WineQuality",
    "baseline": "xgboost",
    "sample_size": 2126,
    "mse": 0.0141569593,
    "mse_std": 0.0011108496,
    "aug_mse": 0.0142937065,
    "aug_mse_std": 0.0011145633,
    "delta_mse": 1.0137271686,
    "delta_mse_std": 2.8372576005,
    "p_wilcoxon": 0.0013020833,
    "p_wilcoxon_std": 0.0006828183,
    "should_proceed": true
  },
  {
    "dataset": "WineQuality",
    "baseline": "xgboost",
    "sample_size": 3189,
    "mse": 0.0139096469,
    "mse_std": 0.0007317437,
    "aug_mse": 0.0138572102,
    "aug_mse_std": 0.0006309803,
    "delta_mse": -0.3260641406,
    "delta_mse_std": 1.8723529469,
    "p_wilcoxon": 0.001171875,
    "p_wilcoxon_std": 0.0005289088,
    "should_proceed": true
  },
  {
    "dataset": "WineQuality",
    "baseline": "xgboost",
    "sample_size": 4252,
    "mse": 0.0133201816,
    "mse_std": 0.0009223569,
    "aug_mse": 0.0132427331,
    "aug_mse_std": 0.0010153337,
    "delta_mse": -0.6125278284,
    "delta_mse_std": 1.7255729067,
    "p_wilcoxon": 0.0011067708,
    "p_wilcoxon_std": 0.0003319674,
    "should_proceed": true
  },
  {
    "dataset": "WineQuality",
    "baseline": "xgboost",
    "sample_size": 5315,
    "mse": 0.0133190184,
    "mse_std": 0.0004769906,
    "aug_mse": 0.0131772988,
    "aug_mse_std": 0.000543836,
    "delta_mse": -1.0787359372,
    "delta_mse_std": 1.1458290831,
    "p_wilcoxon": 0.0009765625,
    "p_wilcoxon_std": 0.0,
    "should_proceed": true
  }
]