{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"VAD-logistic-regression.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyORbN2MRJUJXbp962MJIzx+"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":3,"metadata":{"id":"XLCwlGegVT-Z","executionInfo":{"status":"ok","timestamp":1652963494374,"user_tz":-480,"elapsed":25523,"user":{"displayName":"Yewen Fan","userId":"08524649782816922526"}},"outputId":"d116d2a8-d18d-4b7d-c3f7-3fb15060f028","colab":{"base_uri":"https://localhost:8080/"}},"outputs":[{"output_type":"stream","name":"stdout","text":["alpha:  0.02\n","mean of total_calibration_p :  1.0075426623474724\n","mean of total_calibration_y :  1.00684832888181\n","mean of positive_ratio_after_selection :  0.5006666666666666\n","mean of positive_ratio_train :  0.7223800000000001\n","mean of positive_ratio_test :  0.27763566666666667\n","mean of Ground truth :  1.0014845514782749\n","mean of calibration_y_true_p :  1.0017355436161197\n","mean of calibration_p_true_p :  1.0008945827106528\n","mean of calibration_p_y_true_p :  1.000854484927436\n","mean of Vanilla :  1.0870833403158637\n","mean of Vanilla_p :  1.085150644350889\n","mean of Vanilla_ECE :  0.06580099999999998\n","mean of Vanilla_MCE :  0.14744999999999997\n","mean of VAD :  0.9947581526963414\n","mean of VAD_p :  0.9930470981433029\n","mean of VAD_ECE :  0.05720600000000001\n","mean of VAD_MCE :  0.12937900000000002\n","mean of VAD prob :  1.0053367056113378\n","mean of VAD prob_p :  1.0035978541514707\n","mean of VAD prob_ECE :  0.05689999999999999\n","mean of VAD prob_MCE :  0.129316\n","mean of Lambda logit :  0.8393979321066222\n","mean of Lambda prob :  0.8438001207738465\n","mean of Mu logit :  -1.003353886934257\n","mean of Mu prob :  0.27955385288476553\n","mean of Log Loss Logit Improvement :  -0.4780223901090435\n","mean of Log Loss Prob Improvement :  -0.4939185668278321\n","\n","alpha:  0.1\n","mean of total_calibration_p :  1.0075426623474724\n","mean of total_calibration_y :  1.00684832888181\n","mean of positive_ratio_after_selection :  0.43296000000000007\n","mean of positive_ratio_train :  0.7223800000000001\n","mean of positive_ratio_test :  0.27763566666666667\n","mean of Ground truth :  1.0001671994078054\n","mean of calibration_y_true_p :  1.003596003566092\n","mean of calibration_p_true_p :  1.0020859768658152\n","mean of calibration_p_y_true_p :  0.9989820137607114\n","mean of Vanilla :  1.0696838274979306\n","mean of Vanilla_p :  1.0694648688967405\n","mean of Vanilla_ECE :  0.04178199999999999\n","mean of Vanilla_MCE :  0.08305099999999997\n","mean of VAD :  0.9941392822186047\n","mean of VAD_p :  0.9940116548764467\n","mean of VAD_ECE :  0.034336000000000005\n","mean of VAD_MCE :  0.07251\n","mean of VAD prob :  1.0039648130036836\n","mean of VAD prob_p :  1.0038133536191325\n","mean of VAD prob_ECE :  0.033712000000000006\n","mean of VAD prob_MCE :  0.071463\n","mean of Lambda logit :  0.8416838338896833\n","mean of Lambda prob :  0.8441281519568941\n","mean of Mu logit :  -1.003353886934257\n","mean of Mu prob :  0.27955385288476553\n","mean of Log Loss Logit Improvement :  -0.24403063157619265\n","mean of Log Loss Prob Improvement :  -0.26066089249243063\n","\n"]}],"source":["import numpy as np\n","from sklearn.linear_model import LogisticRegression\n","from sklearn.metrics import log_loss\n","from VAD_util import calculate_lambda, prediction_transformation\n","\n","# fix random seed so it's easier to reproduce the result\n","np.random.seed(1234)\n","\n","num_features = 20\n","sigma = 0.1\n","num_examples = 30000\n","num_examples_train = 3000\n","num_simulation = 100\n","S_GROUP = 2\n","\n","result_keys = [\n","    'total_calibration_p',\n","    'total_calibration_y',\n","    'positive_ratio_after_selection',\n","    'positive_ratio_train',\n","    'positive_ratio_test',\n","    'Ground truth',\n","    'calibration_y_true_p',\n","    'calibration_p_true_p',\n","    'calibration_p_y_true_p',\n","    'Vanilla',\n","    'Vanilla_p',\n","    'Vanilla_ECE',\n","    'Vanilla_MCE',\n","    'VAD',\n","    'VAD_p',\n","    'VAD_ECE',\n","    'VAD_MCE',\n","    'VAD prob',\n","    'VAD prob_p',\n","    'VAD prob_ECE',\n","    'VAD prob_MCE',\n","    'Lambda logit',\n","    'Lambda prob',\n","    'Mu logit',\n","    'Mu prob',\n","    'Log Loss Logit Improvement',\n","    'Log Loss Prob Improvement',\n","]\n","\n","def generate_data(num_examples, mu):\n","    x = np.random.normal(mu, sigma, (num_examples, num_features))\n","    p = 1 / (1 + np.exp(- np.sum(x, axis=1)))\n","    y = np.random.binomial(1, p)\n","    return x, y, p\n","\n","def bootstrap_data(x, y):\n","    num_examples = x.shape[0]\n","    ind = np.random.choice(np.arange(num_examples), num_examples, replace=True)\n","    return x[ind], y[ind]\n","\n","def calculate_ECE_MCE_Brier(y_prob, y_true, n_bins):\n","    sorted_indices = np.argsort(y_prob)\n","    sorted_y_true = y_true[sorted_indices]\n","    sorted_y_prob = y_prob[sorted_indices]\n","    binned_y_true = np.array_split(sorted_y_true, n_bins)\n","    binned_y_prob = np.array_split(sorted_y_prob, n_bins)\n","    ece_errors = 0.0\n","    mce_errors = 0.0\n","    brier_errors = 0.0\n","    for bin_y_true, bin_y_prob in zip(binned_y_true, binned_y_prob):\n","        avg_y_true = np.mean(bin_y_true)\n","        avg_y_score = np.mean(bin_y_prob)\n","        ce_error = np.abs(avg_y_score - avg_y_true)\n","        ece_errors += ce_error / n_bins\n","        mce_errors = max(mce_errors, ce_error)\n","        brier_errors += ((avg_y_score - avg_y_true) ** 2) / n_bins\n","    return round(ece_errors, 4), round(mce_errors, 4), round(brier_errors, 4)\n","\n","def VAD_method(num_simulation, num_group, method, alpha, train_data, test_data, test_val_data):\n","\n","    def calculate_logit(X, clf):\n","        return np.squeeze(X @ clf.coef_.T + clf.intercept_)\n","\n","    def append_result(result, curr):\n","        for result_key in result_keys:\n","            result[result_key].append(curr[result_key])\n","        return result\n","\n","    assert method in ['bootstrap', 'seperate_data']\n","    x_train_array, y_train_array, p_train_array = train_data\n","    x_test_val_array, _, _ = test_val_data \n","    x_test_array, y_test_array, p_test_array = test_data \n","\n","    result = {}\n","    for result_key in result_keys:\n","        result[result_key] = []\n","\n","    for k in range(num_simulation):\n","        x_train, y_train, p_train = x_train_array[k], y_train_array[k], p_train_array[k]\n","        x_test, y_test, p_test = x_test_array[k], y_test_array[k], p_test_array[k]\n","        x_test_val = x_test_val_array[k]\n","        clf = LogisticRegression(penalty='none', random_state=0).fit(x_train, y_train)\n","        p_predicted = clf.predict_proba(x_test)[:, 1]\n","        p_predicted_test_val = clf.predict_proba(x_test_val)[:, 1]\n","        logit_predicted = calculate_logit(x_test, clf)\n","        logit_predicted_test_val = calculate_logit(x_test_val, clf)\n","        num_examples_test_val = x_test_val.shape[0]\n","        num_examples_test = x_test.shape[0]\n","        p_predicted_subgroup = np.zeros((num_examples_test_val, num_group))\n","        logit_predicted_subgroup = np.zeros((num_examples_test_val, num_group))\n","        for i in range(num_group):\n","            x_train_bootstrap, y_train_bootstrap = bootstrap_data(x_train, y_train)\n","            clf_i = LogisticRegression(penalty='none', random_state=0).fit(x_train_bootstrap, y_train_bootstrap)\n","            p_predicted_subgroup[:, i] = clf_i.predict_proba(x_test_val)[:, 1]\n","            logit_predicted_subgroup[:, i] = calculate_logit(x_test_val, clf_i)\n","\n","        lambda_p_logit = calculate_lambda(p_predicted_subgroup, p_predicted_test_val, 'logit', logit_predicted_subgroup, logit_predicted_test_val)\n","        lambda_p_prob = calculate_lambda(p_predicted_subgroup, p_predicted_test_val, 'probability', logit_predicted_subgroup, logit_predicted_test_val)\n","\n","        choose_num_examples = int(num_examples_test * alpha)\n","        ind = np.argpartition(p_predicted, -choose_num_examples)[-choose_num_examples:]\n","        ind_true_p =  np.argpartition(p_test, -choose_num_examples)[-choose_num_examples:]\n","\n","        log_loss = np.sum(-(y_test[ind] * np.log(p_predicted[ind]) + (1 - y_test[ind]) * np.log(1 - p_predicted[ind])))\n","        refined_prediction_logit = prediction_transformation(p_predicted, ind, lambda_p_logit, 'logit', np.mean(p_predicted_test_val), np.mean(logit_predicted_test_val), logit_predicted)\n","        refined_prediction_prob = prediction_transformation(p_predicted, ind, lambda_p_prob, 'probability', np.mean(p_predicted_test_val), np.mean(logit_predicted_test_val), logit_predicted)\n","        refined_log_loss_logit = np.sum(-(y_test[ind] * np.log(refined_prediction_logit) + (1 - y_test[ind]) * np.log(1 - refined_prediction_logit)))\n","        refined_log_loss_prob = np.sum(-(y_test[ind] * np.log(refined_prediction_prob) + (1 - y_test[ind]) * np.log(1 - refined_prediction_prob)))\n","\n","        ECE, MCE, Brier = calculate_ECE_MCE_Brier(p_predicted[ind], y_test[ind], 10)\n","        ECE_logit, MCE_logit, Brier_logit = calculate_ECE_MCE_Brier(refined_prediction_logit, y_test[ind], 10)\n","        ECE_prob, MCE_prob, Brier_prob = calculate_ECE_MCE_Brier(refined_prediction_prob, y_test[ind], 10)\n","\n","        curr = {\n","            'total_calibration_p': np.sum(p_predicted) / np.sum(p_test),\n","            'total_calibration_y': np.sum(p_predicted) / np.sum(y_test),\n","            'positive_ratio_after_selection': np.sum(y_test[ind]) / len(y_test[ind]),\n","            'positive_ratio_train': np.sum(y_train) / len(y_train),\n","            'positive_ratio_test': np.sum(y_test) / len(y_test),\n","            'Ground truth': np.sum(p_test[ind]) / np.sum(y_test[ind]),\n","            'calibration_y_true_p': np.sum(p_predicted[ind_true_p]) / np.sum(y_test[ind_true_p]),\n","            'calibration_p_true_p': np.sum(p_predicted[ind_true_p]) / np.sum(p_test[ind_true_p]),\n","            'calibration_p_y_true_p': np.sum(y_test[ind_true_p]) / np.sum(p_test[ind_true_p]),\n","            'Vanilla': np.sum(p_predicted[ind]) / np.sum(y_test[ind]),\n","            'Vanilla_p': np.sum(p_predicted[ind]) / np.sum(p_test[ind]),\n","            'Vanilla_ECE': ECE,\n","            'Vanilla_MCE': MCE,\n","            'VAD': np.sum(refined_prediction_logit) / np.sum(y_test[ind]),\n","            'VAD_p': np.sum(refined_prediction_logit) / np.sum(p_test[ind]),\n","            'VAD_ECE': ECE_logit,\n","            'VAD_MCE': MCE_logit,\n","            'VAD prob': np.sum(refined_prediction_prob) / np.sum(y_test[ind]),\n","            'VAD prob_p': np.sum(refined_prediction_prob) / np.sum(p_test[ind]),\n","            'VAD prob_ECE': ECE_prob,\n","            'VAD prob_MCE': MCE_prob,\n","            'Lambda logit': lambda_p_logit,\n","            'Lambda prob': lambda_p_prob,\n","            'Mu logit': np.mean(logit_predicted_test_val),\n","            'Mu prob': np.mean(p_predicted_test_val),\n","            'Log Loss Logit Improvement': (refined_log_loss_logit - log_loss) / log_loss * 100,\n","            'Log Loss Prob Improvement': (refined_log_loss_prob - log_loss) / log_loss * 100,\n","        }\n","        result = append_result(result, curr)\n","\n","    print(\"alpha: \", alpha)\n","    for result_key in result_keys:\n","        result[result_key] = np.array(result[result_key])\n","        print(\"mean of\", result_key, \": \", np.mean(result[result_key]))\n","    print()\n","\n","    return result\n","\n","def generate_data_array(num_simulation, num_examples, mu):\n","    x_array = []\n","    y_array = []\n","    p_array = []\n","    for k in range(num_simulation):\n","        x, y, p = generate_data(num_examples, mu)\n","        x_array.append(x)\n","        y_array.append(y)\n","        p_array.append(p)\n","    return (x_array, y_array, p_array)\n","\n","\n","mu = -0.05\n","mu_train = 0.05\n","train_data = generate_data_array(num_simulation, num_examples_train, mu_train)\n","test_data = generate_data_array(num_simulation, num_examples, mu)\n","test_val_data = generate_data_array(num_simulation, num_examples, mu)\n","\n","report = {}\n","reported_result_keys = [\n","    'Vanilla',\n","    'Vanilla_ECE',\n","    'Vanilla_MCE',\n","    'VAD',\n","    'VAD_ECE',\n","    'VAD_MCE',\n","    'VAD prob',\n","    'VAD prob_ECE',\n","    'VAD prob_MCE',\n","]\n","\n","alphas = [0.02, 0.1]\n","for alpha in alphas:\n","    result = VAD_method(num_simulation, S_GROUP, 'bootstrap', alpha, train_data, test_data, test_val_data)\n","    alpha_key = int(alpha * 1000)\n","    report[alpha_key] = {}\n","    for result_key in reported_result_keys:\n","        curr_result = result[result_key]\n","        total_num_result = curr_result.shape[0]\n","        report[alpha_key][result_key] = (np.mean(curr_result), np.std(curr_result) / np.sqrt(total_num_result))"]}]}