{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "kI2GETHlZDbc"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.metrics import log_loss\n",
        "import matplotlib.pyplot as plt\n",
        "import cvxpy as cp\n",
        "import pandas as pd\n",
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "background_save": true
        },
        "id": "l2hqktNClp5o",
        "outputId": "57586d06-fe31-4fe4-814c-14fcfdc4335b"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "93.5 0.19\n",
            "94.53 0.17\n"
          ]
        }
      ],
      "source": [
        "# Linear Regression\n",
        "\n",
        "from sklearn.linear_model import LinearRegression\n",
        "from sklearn.metrics import mean_squared_error\n",
        "\n",
        "# fix random seed so it's easier to reproduce the result\n",
        "np.random.seed(100)\n",
        "\n",
        "num_round = 100\n",
        "num_features = 20\n",
        "num_features_weak = 19\n",
        "x_sigma = 0.25\n",
        "x_mu = -0.05\n",
        "e_sigma = 2\n",
        "e_mu = 1\n",
        "\n",
        "num_train = 1000\n",
        "num_test = 10000\n",
        "num_val = 1000\n",
        "\n",
        "def generate_data(num_examples, x_mu, x_sigma, e_mu, e_sigma):\n",
        "    x = np.random.normal(x_mu, x_sigma, (num_examples, num_features))\n",
        "    y = np.sum(x, axis=1) + np.random.normal(e_mu, e_sigma, num_examples)\n",
        "    return x, y\n",
        "\n",
        "def calculate_bias_exp(p, y):\n",
        "    return np.mean(y) - np.mean(p)\n",
        "\n",
        "def apply_bias_exp(p, b):\n",
        "    return p + b\n",
        "\n",
        "\n",
        "def get_correct_ratio(weak_model_losses, strong_model_losses, pairwise=True):\n",
        "    num_model_weak = len(weak_model_losses)\n",
        "    num_model_strong = len(strong_model_losses)\n",
        "    better_model_count = 0\n",
        "\n",
        "    if pairwise:\n",
        "        for i in range(num_model_weak):\n",
        "            for j in range(num_model_strong):\n",
        "                if strong_model_losses[j] < weak_model_losses[i]:\n",
        "                    better_model_count += 1\n",
        "        return better_model_count / num_model_weak / num_model_strong\n",
        "    else:\n",
        "        assert num_model_weak == num_model_strong\n",
        "        for i in range(num_model_weak):\n",
        "            if strong_model_losses[i] < weak_model_losses[i]:\n",
        "                    better_model_count += 1\n",
        "        return better_model_count / num_model_weak\n",
        "\n",
        "x_test_val, y_test_val = generate_data(num_test + num_val, x_mu, x_sigma, e_mu, e_sigma)\n",
        "x_test, y_test = x_test_val[:num_test, :], y_test_val[:num_test]\n",
        "x_val, y_val = x_test_val[num_test:, :], y_test_val[num_test:]\n",
        "\n",
        "df = pd.DataFrame(columns=['Model A', 'Model B', 'Quadratic Loss Accuracy', 'Cali-Free Accuracy'])\n",
        "df_loss_std = pd.DataFrame(columns=['Model', 'Quadratic Loss std', 'Cali-Free std', 'Quadratic Loss mean', 'Cali-Free mean'])\n",
        "\n",
        "for _ in range(num_round):\n",
        "    calibration_array = []\n",
        "    quadratic_loss_array = []\n",
        "    quadratic_loss_array_full = []\n",
        "    quadratic_loss_array_corrected = []\n",
        "    calibration_array_weak = []\n",
        "    quadratic_loss_array_weak = []\n",
        "    quadratic_loss_array_weak_full = []\n",
        "    quadratic_loss_array_weak_corrected = []\n",
        "    for i in range(100):\n",
        "        x_train, y_train = generate_data(num_train, x_mu, x_sigma, e_mu, e_sigma)\n",
        "        clf = LinearRegression().fit(x_train, y_train)\n",
        "        x_train, y_train = generate_data(num_train, x_mu, x_sigma, e_mu, e_sigma)\n",
        "        clf_weak = LinearRegression().fit(x_train[:, :num_features_weak], y_train)\n",
        "        p_predicted_test_val = clf.predict(x_test_val)\n",
        "        p_predicted = p_predicted_test_val[:num_test]\n",
        "        p_predicted_val = p_predicted_test_val[num_test:]\n",
        "        bias = calculate_bias_exp(p_predicted_val, y_val)\n",
        "        p_corrected = apply_bias_exp(p_predicted, bias)\n",
        "        p_predicted_weak_test_val = clf_weak.predict(x_test_val[:, :num_features_weak])\n",
        "        p_predicted_weak = p_predicted_weak_test_val[:num_test]\n",
        "        p_predicted_weak_val = p_predicted_weak_test_val[num_test:]\n",
        "        bias_weak = calculate_bias_exp(p_predicted_weak_val, y_val)\n",
        "        p_corrected_weak = apply_bias_exp(p_predicted_weak, bias_weak)\n",
        "        calibration_array.append(np.sum(p_predicted) / np.sum(y_test))\n",
        "        quadratic_loss_array.append(mean_squared_error(y_test, p_predicted))\n",
        "        quadratic_loss_array_full.append(mean_squared_error(y_test_val, p_predicted_test_val))\n",
        "        quadratic_loss_array_corrected.append(mean_squared_error(y_test, p_corrected))\n",
        "        calibration_array_weak.append(np.sum(p_predicted_weak) / np.sum(y_test))\n",
        "        quadratic_loss_array_weak.append(mean_squared_error(y_test, p_predicted_weak))\n",
        "        quadratic_loss_array_weak_full.append(mean_squared_error(y_test_val, p_predicted_weak_test_val))\n",
        "        quadratic_loss_array_weak_corrected.append(mean_squared_error(y_test, p_corrected_weak))\n",
        "\n",
        "\n",
        "    quadratic_loss_result = get_correct_ratio(quadratic_loss_array_weak_full, quadratic_loss_array_full)\n",
        "    cali_free_quadratic_loss_result = get_correct_ratio(quadratic_loss_array_weak_corrected, quadratic_loss_array_corrected)\n",
        "    new_row = {\n",
        "        'Model A': \"20 features\",\n",
        "        'Model B': \"19 features\",\n",
        "        'Quadratic Loss Accuracy': f\"{round(quadratic_loss_result * 100, 1)}%\",\n",
        "        'Cali-Free Accuracy': f\"{round(cali_free_quadratic_loss_result* 100, 1)}%\",\n",
        "    }\n",
        "    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)\n",
        "    new_row = {\n",
        "        'Model': \"20 features\",\n",
        "        'Quadratic Loss std': f\"{round(np.std(np.array(quadratic_loss_array_full)), 5)}\",\n",
        "        'Cali-Free std': f\"{round(np.std(np.array(quadratic_loss_array_corrected)), 5)}\",\n",
        "        'Quadratic Loss mean': f\"{round(np.mean(np.array(quadratic_loss_array_full)), 5)}\",\n",
        "        'Cali-Free mean': f\"{round(np.mean(np.array(quadratic_loss_array_corrected)), 5)}\",\n",
        "    }\n",
        "    df_loss_std = pd.concat([df_loss_std, pd.DataFrame([new_row])], ignore_index=True)\n",
        "\n",
        "log_loss_accuracy = []\n",
        "cali_free_accuracy = []\n",
        "\n",
        "for i in range(num_round):\n",
        "    log_loss_accuracy.append(float(df[\"Quadratic Loss Accuracy\"][i][:4]))\n",
        "    cali_free_accuracy.append(float(df[\"Cali-Free Accuracy\"][i][:4]))\n",
        "\n",
        "log_loss_accuracy = np.array(log_loss_accuracy)\n",
        "cali_free_accuracy = np.array(cali_free_accuracy)\n",
        "print(round(np.mean(log_loss_accuracy), 2), round(np.std(log_loss_accuracy) / np.sqrt(len(log_loss_accuracy)), 2))\n",
        "print(round(np.mean(cali_free_accuracy), 2), round(np.std(cali_free_accuracy) / np.sqrt(len(log_loss_accuracy)), 2))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "-2-vdxh2sg_X"
      },
      "outputs": [],
      "source": [
        "# Logistic Regression, label in {0, 1} in this vanilla implementation\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings('ignore')\n",
        "\n",
        "import numpy as np\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.metrics import log_loss\n",
        "\n",
        "# fix random seed so it's easier to reproduce the result\n",
        "np.random.seed(1234)\n",
        "\n",
        "num_round = 100\n",
        "num_features = 20\n",
        "num_features_weak = 19\n",
        "sigma = 0.25\n",
        "mu = -0.05\n",
        "\n",
        "num_train = 1000\n",
        "num_test = 10000\n",
        "num_val = 2000\n",
        "\n",
        "\n",
        "def generate_data(num_examples, mu):\n",
        "    x = np.random.normal(mu, sigma, (num_examples, num_features))\n",
        "    p = 1 / (1 + np.exp(- np.sum(x, axis=1)))\n",
        "    y = np.random.binomial(1, p)\n",
        "    return x, y, p\n",
        "\n",
        "\n",
        "def calculate_bias_exp(p, y):\n",
        "    num_round = 4\n",
        "    b = 1\n",
        "    for i in range(num_round):\n",
        "        curr_calibration = np.sum(p) / np.sum(y)\n",
        "        p = p / (p + (1 - p) * curr_calibration)\n",
        "        b *= curr_calibration\n",
        "    return b\n",
        "\n",
        "def apply_bias_exp(p, b):\n",
        "    return p / (p + (1 - p) * b)\n",
        "\n",
        "\n",
        "def get_correct_ratio(weak_model_losses, strong_model_losses, pairwise=True):\n",
        "    num_model_weak = len(weak_model_losses)\n",
        "    num_model_strong = len(strong_model_losses)\n",
        "    better_model_count = 0\n",
        "\n",
        "    if pairwise:\n",
        "        for i in range(num_model_weak):\n",
        "            for j in range(num_model_strong):\n",
        "                if strong_model_losses[j] < weak_model_losses[i]:\n",
        "                    better_model_count += 1\n",
        "        return better_model_count / num_model_weak / num_model_strong\n",
        "    else:\n",
        "        assert num_model_weak == num_model_strong\n",
        "        for i in range(num_model_weak):\n",
        "            if strong_model_losses[i] < weak_model_losses[i]:\n",
        "                    better_model_count += 1\n",
        "        return better_model_count / num_model_weak\n",
        "\n",
        "\n",
        "x_test_val, y_test_val, p_test_val = generate_data(num_test + num_val, mu)\n",
        "x_test, y_test, p_test = x_test_val[:num_test, :], y_test_val[:num_test], p_test_val[:num_test]\n",
        "x_val, y_val, p_val = x_test_val[num_test:, :], y_test_val[num_test:], p_test_val[num_test:]\n",
        "\n",
        "df = pd.DataFrame(columns = ['Model A', 'Model B', 'Log Loss Accuracy', 'Cali-Free Accuracy'])\n",
        "df_loss_std = pd.DataFrame(columns = ['Model', 'Log Loss std', 'Cali-Free std', 'Log Loss mean', 'Cali-Free mean'])\n",
        "\n",
        "for _ in range(num_round):\n",
        "    calibration_array = []\n",
        "    log_loss_array = []\n",
        "    log_loss_array_full = []\n",
        "    log_loss_array_corrected = []\n",
        "    calibration_array_weak = []\n",
        "    log_loss_array_weak = []\n",
        "    log_loss_array_weak_full = []\n",
        "    log_loss_array_weak_corrected = []\n",
        "    for i in range(100):\n",
        "        x_train, y_train, p_train = generate_data(num_train, mu)\n",
        "        clf = LogisticRegression(penalty='none', random_state=0).fit(x_train, y_train)\n",
        "        x_train, y_train, p_train = generate_data(num_train, mu)\n",
        "        clf_weak = LogisticRegression(penalty='none', random_state=0).fit(x_train[:, :num_features_weak], y_train)\n",
        "        p_predicted_test_val = clf.predict_proba(x_test_val)[:, 1]\n",
        "        p_predicted = p_predicted_test_val[:num_test]\n",
        "        p_predicted_val = p_predicted_test_val[num_test:]\n",
        "        bias = calculate_bias_exp(p_predicted_val, y_val)\n",
        "        p_corrected = apply_bias_exp(p_predicted, bias)\n",
        "        p_predicted_weak_test_val = clf_weak.predict_proba(x_test_val[:, :num_features_weak])[:, 1]\n",
        "        p_predicted_weak = p_predicted_weak_test_val[:num_test]\n",
        "        p_predicted_weak_val = p_predicted_weak_test_val[num_test:]\n",
        "        bias_weak = calculate_bias_exp(p_predicted_weak_val, y_val)\n",
        "        p_corrected_weak = apply_bias_exp(p_predicted_weak, bias_weak)\n",
        "        calibration_array.append(np.sum(p_predicted) / np.sum(y_test))\n",
        "        log_loss_array.append(log_loss(y_test, p_predicted))\n",
        "        log_loss_array_full.append(log_loss(y_test_val, p_predicted_test_val))\n",
        "        log_loss_array_corrected.append(log_loss(y_test, p_corrected))\n",
        "        calibration_array_weak.append(np.sum(p_predicted_weak) / np.sum(y_test))\n",
        "        log_loss_array_weak.append(log_loss(y_test, p_predicted_weak))\n",
        "        log_loss_array_weak_full.append(log_loss(y_test_val, p_predicted_weak_test_val))\n",
        "        log_loss_array_weak_corrected.append(log_loss(y_test, p_corrected_weak))\n",
        "\n",
        "\n",
        "    log_loss_result = get_correct_ratio(log_loss_array_weak_full, log_loss_array_full)\n",
        "    cali_free_log_loss_result = get_correct_ratio(log_loss_array_weak_corrected, log_loss_array_corrected)\n",
        "    new_row = {\n",
        "        'Model A': \"20 features\",\n",
        "        'Model B': \"19 features\",\n",
        "        'Log Loss Accuracy': f\"{round(log_loss_result * 100, 1)}%\",\n",
        "        'Cali-Free Accuracy': f\"{round(cali_free_log_loss_result* 100, 1)}%\",\n",
        "    }\n",
        "    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)\n",
        "    new_row = {\n",
        "        'Model': \"20 features\",\n",
        "        'Log Loss std': f\"{round(np.std(np.array(log_loss_array_full)), 5)}\",\n",
        "        'Cali-Free std': f\"{round(np.std(np.array(log_loss_array_corrected)), 5)}\",\n",
        "        'Log Loss mean': f\"{round(np.mean(np.array(log_loss_array_full)), 5)}\",\n",
        "        'Cali-Free mean': f\"{round(np.mean(np.array(log_loss_array_corrected)), 5)}\",\n",
        "    }\n",
        "    df_loss_std = pd.concat([df_loss_std, pd.DataFrame([new_row])], ignore_index=True)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "-uuyowP3Cse9",
        "outputId": "fbf97c01-0686-41ab-d6d6-6c82823a6a7b"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "85.93 0.26\n",
            "89.36 0.24\n"
          ]
        }
      ],
      "source": [
        "import numpy as np\n",
        "\n",
        "log_loss_accuracy = []\n",
        "cali_free_accuracy = []\n",
        "\n",
        "for i in range(100):\n",
        "    log_loss_accuracy.append(float(df[\"Log Loss Accuracy\"][i][:4]))\n",
        "    cali_free_accuracy.append(float(df[\"Cali-Free Accuracy\"][i][:4]))\n",
        "\n",
        "log_loss_accuracy = np.array(log_loss_accuracy)\n",
        "cali_free_accuracy = np.array(cali_free_accuracy)\n",
        "print(round(np.mean(log_loss_accuracy), 2), round(np.std(log_loss_accuracy) / np.sqrt(len(log_loss_accuracy)), 2))\n",
        "print(round(np.mean(cali_free_accuracy), 2), round(np.std(cali_free_accuracy) / np.sqrt(len(log_loss_accuracy)), 2))"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "machine_shape": "hm",
      "provenance": []
    },
    "gpuClass": "standard",
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}