{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"VAD-dlrm.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyMCmoTlbf6srfevZFBht/qb"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# Instructions\n","\n","1. git clone the repo and upload the whole repo to google drive\n","2. use Google Colab to execute the code (including model training, inference, and applying VAD)"],"metadata":{"id":"f5ER6W4Hp_AE"}},{"cell_type":"markdown","source":["# Section 1: DLRM training && inference"],"metadata":{"id":"wPC0bWehrFGa"}},{"cell_type":"code","execution_count":null,"metadata":{"id":"KAcJ8H2lVPin"},"outputs":[],"source":["from google.colab import drive\n","drive.mount('/content/gdrive')"]},{"cell_type":"code","source":["# enter your working directory\n","\n","# %cd /content/gdrive/MyDrive/your_folder/VAD/dlrm/"],"metadata":{"id":"sT7qT0uOVgib"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# install the required package\n","\n","# !pip install \"git+https://github.com/mlperf/logging.git@beaf26d\""],"metadata":{"id":"f2ros_RKmD4c"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# command to train DLRM model && do inference on test / validation dataset\n","# to fully reproduce the result, please train the model on 15M data instead of 100k example data\n","\n","\n","# %%shell\n","\n","# for VAR in {1..1}\n","# do \n","#     python dlrm_s_pytorch.py \\\n","#         --arch-sparse-feature-size=16 \\\n","#         --arch-mlp-bot=\"13-512-256-64-16\" \\\n","#         --arch-mlp-top=\"512-256-1\" \\\n","#         --data-generation=dataset \\\n","#         --data-set=kaggle \\\n","#         --raw-data-file=./input/train_last_100k_as_example.txt \\\n","#         --processed-data-file=./input/kaggleAdDisplayChallenge_processed.npz \\\n","#         --save-model=./model/model_example.pt \\\n","#         --numpy-rand-seed=${VAR} \\\n","#         --data-randomize=total \\\n","#         --lr-num-warmup-steps=0 \\\n","#         --lr-decay-start-step=0 \\\n","#         --loss-function=bce \\\n","#         --round-targets=True \\\n","#         --mlperf-logging \\\n","#         --nepochs=2 \\\n","#         --learning-rate=0.1 \\\n","#         --mini-batch-size=128 \\\n","#         --print-freq=81920 \\\n","#         --print-time \\\n","#         --test-mini-batch-size=16384 \\\n","#         --test-num-workers=4 \\\n","#         --test-freq=655360\n","\n","#     python dlrm_s_pytorch.py \\\n","#         --arch-sparse-feature-size=16 \\\n","#         --arch-mlp-bot=\"13-512-256-64-16\" \\\n","#         --arch-mlp-top=\"512-256-1\" \\\n","#         --data-generation=dataset \\\n","#         --data-set=kaggle \\\n","#         --raw-data-file=./input/train_last_100k_as_example.txt \\\n","#         --processed-data-file=./input/kaggleAdDisplayChallenge_processed.npz \\\n","#         --load-model=./model/model_example.pt \\\n","#         --dump-json-file=./result/result_example.json \\\n","#         --test-data-split=test \\\n","#         --inference-only \\\n","#         --data-randomize=total \\\n","#         --lr-num-warmup-steps=0 \\\n","#         --lr-decay-start-step=0 \\\n","#         --loss-function=bce \\\n","#         --round-targets=True \\\n","#         --mlperf-logging \\\n","#         --nepochs=1 \\\n","#         --learning-rate=0.1 \\\n","#         --mini-batch-size=128 \\\n","#         --print-freq=81920 \\\n","#         --print-time \\\n","#         --test-mini-batch-size=16384 \\\n","#         --test-num-workers=4 \\\n","#         --test-freq=655360\n","\n","#     python dlrm_s_pytorch.py \\\n","#         --arch-sparse-feature-size=16 \\\n","#         --arch-mlp-bot=\"13-512-256-64-16\" \\\n","#         --arch-mlp-top=\"512-256-1\" \\\n","#         --data-generation=dataset \\\n","#         --data-set=kaggle \\\n","#         --raw-data-file=./input/train_last_100k_as_example.txt \\\n","#         --processed-data-file=./input/kaggleAdDisplayChallenge_processed.npz \\\n","#         --load-model=./model/model_example.pt \\\n","#         --dump-json-file=./result/result_example_val.json \\\n","#         --test-data-split=val \\\n","#         --inference-only \\\n","#         --data-randomize=total \\\n","#         --lr-num-warmup-steps=0 \\\n","#         --lr-decay-start-step=0 \\\n","#         --loss-function=bce \\\n","#         --round-targets=True \\\n","#         --mlperf-logging \\\n","#         --nepochs=1 \\\n","#         --learning-rate=0.1 \\\n","#         --mini-batch-size=128 \\\n","#         --print-freq=81920 \\\n","#         --print-time \\\n","#         --test-mini-batch-size=16384 \\\n","#         --test-num-workers=4 \\\n","#         --test-freq=655360\n","# done"],"metadata":{"id":"ih9ro_rngFB7"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["# Section 2: Apply VAD"],"metadata":{"id":"DWHEr63Eq2rq"}},{"cell_type":"code","source":["# %cd /content/gdrive/MyDrive/your_folder/VAD/"],"metadata":{"id":"Pha7Ltc_23DX"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["import codecs, json\n","import numpy as np\n","import tensorflow as tf\n","\n","from sklearn.calibration import IsotonicRegression\n","import calibration_utils\n","import calibration_calibrator\n","\n","def get_y_and_p(file_name):\n","    obj_text = codecs.open(file_name, 'r', encoding='utf-8').read()\n","    data = json.loads(obj_text)\n","    y = np.squeeze(np.array(data['targets']))\n","    p = np.squeeze(np.array(data['scores']))\n","    return y, p\n","\n","def assert_test_val_different(y_test, y_val):\n","    # make sure test data and val data are indeed different (i.e. no bug when do model inference)\n","    if len(y_test) == len(y_val):\n","        assert np.sum(y_test == y_val) < len(y_val)\n","\n","def read_data_from_json(\n","    json_name, \n","    json_name_val, \n","    num_group,\n","):\n","\n","    def get_test_and_val(y_1, p_1, y_2, p_2):\n","        assert_test_val_different(y_1, y_2)\n","        y = np.hstack((y_1, y_2))\n","        p = np.hstack((p_1, p_2))\n","        num_examples = y.shape[0]\n","        num_examples_val = num_examples // 10\n","        num_examples -= num_examples_val\n","        return y[:num_examples], p[:num_examples], y[num_examples:], p[num_examples:]\n","\n","    y_1, p_1 = get_y_and_p(json_name.format(1))\n","    y_2, p_2 = get_y_and_p(json_name_val.format(1))\n","    y, p, y_val, p_val = get_test_and_val(y_1, p_1, y_2, p_2)\n","    num_examples = y.shape[0]\n","    num_examples_val = y_val.shape[0]\n","\n","    p_predicted_subgroup = np.zeros((num_examples, num_group))\n","    p_predicted_subgroup_val = np.zeros((num_examples_val, num_group))\n","    # http://ethen8181.github.io/machine-learning/model_selection/prob_calibration/prob_calibration.html#Calibration-Model\n","    p_predicted_subgroup_calibrated = {\n","        \"hist_bin\": np.zeros((num_examples, num_group)),\n","        \"platt\": np.zeros((num_examples, num_group)),\n","        \"platt_hist\": np.zeros((num_examples, num_group)),\n","        \"isotonic\": np.zeros((num_examples, num_group)),\n","    }\n","\n","    y_predicted_subgroup = np.zeros((num_examples, num_group))\n","    y_predicted_subgroup_val = np.zeros((num_examples_val, num_group))\n","    \n","    for i in range(num_group):\n","        y_1, p_1 = get_y_and_p(json_name.format(i+1))\n","        y_2, p_2 = get_y_and_p(json_name_val.format(i+1))\n","        y, p, y_val, p_val = get_test_and_val(y_1, p_1, y_2, p_2)\n","        y_predicted_subgroup[:, i] = y\n","        p_predicted_subgroup[:, i] = p\n","        y_predicted_subgroup_val[:, i] = y_val\n","        p_predicted_subgroup_val[:, i] = p_val\n","\n","        histogram = calibration_calibrator.HistogramCalibrator(n_bins=50)\n","        histogram.fit(p_val, y_val)\n","        histogram_probs = histogram.predict(p)\n","        p_predicted_subgroup_calibrated[\"hist_bin\"][:, i] = histogram_probs\n","\n","        platt = calibration_calibrator.PlattCalibrator(log_odds=True)\n","        platt.fit(p_val, y_val)\n","        platt_probs = platt.predict(p)\n","        p_predicted_subgroup_calibrated[\"platt\"][:, i] = platt_probs\n","\n","        platt_histogram = calibration_calibrator.PlattHistogramCalibrator(n_bins=50, log_odds=True)\n","        platt_histogram.fit(p_val, y_val)\n","        platt_histogram_probs = platt_histogram.predict(p)\n","        p_predicted_subgroup_calibrated[\"platt_hist\"][:, i] = platt_histogram_probs\n","\n","        isotonic = IsotonicRegression(\n","            out_of_bounds='clip',\n","            y_min=p_val.min(),\n","            y_max=p_val.max(),\n","        )\n","        isotonic.fit(p_val, y_val)\n","        isotonic_probs = isotonic.predict(p)\n","        p_predicted_subgroup_calibrated[\"isotonic\"][:, i] = isotonic_probs\n","\n","    # make sure data is correct\n","    for i in range(num_group):\n","        assert np.sum(y_predicted_subgroup[:,0] == y_predicted_subgroup[:,i]) == num_examples\n","        assert np.sum(y_predicted_subgroup_val[:,0] == y_predicted_subgroup_val[:,i]) == num_examples_val\n","\n","    return p_predicted_subgroup, p_predicted_subgroup_calibrated, p_predicted_subgroup_val, y_predicted_subgroup[:, 0], y_predicted_subgroup_val[:, 0]\n","\n","def get_ood_construct_model_predictions():\n","    json_name_nn_select = './dlrm/result/result_model_select.json'\n","    json_name_nn_select_val = './dlrm/result/result_model_select_val.json'\n","    y_1, p_1 = get_y_and_p(json_name_nn_select)\n","    y_2, p_2 = get_y_and_p(json_name_nn_select_val)\n","    assert_test_val_different(y_1, y_2)\n","    y = np.hstack((y_1, y_2))\n","    p = np.hstack((p_1, p_2))\n","    return y, p\n","\n","json_name = \"./dlrm/result/result_model_{}.json\"\n","json_name_val = \"./dlrm/result/result_model_{}_val.json\"\n","\n","# The paper replicates the experiment 40 times, so in order to reproduce the results,\n","# you should train 40 * 2 = 80 models and set num_group = 80\n","num_group = 4\n","\n","bce = tf.keras.losses.BinaryCrossentropy(from_logits=False)\n","ood_y, ood_p = get_ood_construct_model_predictions()\n","\n","p_predicted_subgroup_original, p_predicted_subgroup_calibrated_original, p_predicted_subgroup_original_val, y_original, y_original_val = read_data_from_json(\n","    json_name, \n","    json_name_val, \n","    num_group,\n",")"],"metadata":{"id":"AD2z1ydMq-oL"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def preprocess_test_data(\n","    p_predicted_subgroup, \n","    y,\n","    do_p_selection,\n","    ood_p,\n","    ood_y,\n","):\n","    num_examples = p_predicted_subgroup.shape[0]\n","    assert np.sum(ood_y[:num_examples] == y) == len(y)\n","    selected_ind = list(range(num_examples))\n","    if do_p_selection:\n","        selected_ind = []\n","        for i in range(num_examples):\n","            select = np.random.binomial(1, 1 - ood_p[i])\n","            if select > 0.5:\n","                selected_ind.append(i)\n","    selected_ind = np.array(selected_ind)\n","    print(f\"Selected {len(selected_ind)} samples after test pre-process ({int(100 * len(selected_ind) // num_examples)}% data)\")\n","    print(\"After pre-process test positive ratio: \", np.sum(y[selected_ind]) / len(selected_ind))\n","    print()\n","    return selected_ind\n","\n","do_p_selection = True\n","num_group_bootstrap = 2\n","np.random.seed(1)\n","\n","selected_ind = preprocess_test_data(\n","    p_predicted_subgroup_original, \n","    y_original,\n","    do_p_selection,\n","    ood_p,\n","    ood_y,\n",")\n","\n","p_predicted_subgroup = p_predicted_subgroup_original[selected_ind]\n","y = y_original[selected_ind]\n","\n","p_predicted_subgroup_calibrated = {}\n","for calibration_method in [\"hist_bin\", \"platt\", \"platt_hist\", \"isotonic\"]:\n","    p_predicted_subgroup_calibrated[calibration_method] = p_predicted_subgroup_calibrated_original[calibration_method][selected_ind]"],"metadata":{"id":"WrAYU62YxuTA"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["from VAD_util import calculate_lambda, prediction_transformation\n","\n","def generate_report(\n","    y, \n","    p_predicted_subgroup, \n","    p_predicted_subgroup_pre_cali,\n","    alpha, \n","    print_individual_result,\n","    num_group_bootstrap,\n","    p_predicted_subgroup_val=None,\n","):\n","    def append_result(result, curr):\n","        for result_key in result_keys:\n","            result[result_key].append(curr[result_key])\n","        return result\n","\n","    def calculate_ECE_MCE_Brier(y_prob, y_true, n_bins):\n","        sorted_indices = np.argsort(y_prob)\n","        sorted_y_true = y_true[sorted_indices]\n","        sorted_y_prob = y_prob[sorted_indices]\n","        binned_y_true = np.array_split(sorted_y_true, n_bins)\n","        binned_y_prob = np.array_split(sorted_y_prob, n_bins)\n","        ece_errors = 0.0\n","        mce_errors = 0.0\n","        brier_errors = 0.0\n","        for bin_y_true, bin_y_prob in zip(binned_y_true, binned_y_prob):\n","            avg_y_true = np.mean(bin_y_true)\n","            avg_y_score = np.mean(bin_y_prob)\n","            ce_error = np.abs(avg_y_score - avg_y_true)\n","            ece_errors += ce_error / n_bins\n","            mce_errors = max(mce_errors, ce_error)\n","            brier_errors += ((avg_y_score - avg_y_true) ** 2) / n_bins\n","        return round(ece_errors, 4), round(mce_errors, 4), round(brier_errors, 4)\n","\n","    num_examples, num_group = p_predicted_subgroup.shape\n","    num_examples_test_val = num_examples // 10\n","    num_examples -= num_examples_test_val\n","    p_predicted_subgroup_test_val = p_predicted_subgroup[:num_examples_test_val, :]\n","    p_predicted_subgroup = p_predicted_subgroup[num_examples_test_val:, :]\n","    y = y[num_examples_test_val:]\n","    assert num_examples == p_predicted_subgroup.shape[0]\n","    p_predicted_subgroup_pre_cali_test_val = p_predicted_subgroup_pre_cali[:num_examples_test_val, :]\n","\n","    result = {}\n","    for result_key in result_keys:\n","        result[result_key] = []\n","\n","    for i in range(num_group):\n","        if i >= num_group // num_group_bootstrap:\n","            break\n","        p = p_predicted_subgroup[:, i]\n","        ind = np.argpartition(p, -int(num_examples * alpha))[-int(num_examples * alpha):]\n","        pos_rate = np.sum(y) / num_examples\n","\n","        p_predicted_subgroup_test_val_choosen = np.zeros((num_examples_test_val, num_group_bootstrap))\n","        for j in range(num_group_bootstrap):\n","            p_predicted_subgroup_test_val_choosen[:, j] = p_predicted_subgroup_pre_cali_test_val[:, (i + j * num_group // num_group_bootstrap) % num_group]\n","        lambda_p_logit = calculate_lambda(p_predicted_subgroup_test_val_choosen, p_predicted_subgroup_pre_cali_test_val[:, i], 'logit')\n","        lambda_p_prob = calculate_lambda(p_predicted_subgroup_test_val_choosen, p_predicted_subgroup_pre_cali_test_val[:, i], 'probability')\n","        if p_predicted_subgroup_val is not None:\n","            num_examples_val = p_predicted_subgroup_val.shape[0]\n","            p_predicted_subgroup_val_choosen = np.zeros((num_examples_val, num_group_bootstrap))\n","            for j in range(num_group_bootstrap):\n","                p_predicted_subgroup_val_choosen[:, j] = p_predicted_subgroup_val[:, (i + j * num_group // num_group_bootstrap) % num_group]\n","            lambda_p_logit_in_distribution = calculate_lambda(p_predicted_subgroup_val_choosen, p_predicted_subgroup_val[:, i], 'logit')\n","            lambda_p_prob_in_distribution = calculate_lambda(p_predicted_subgroup_val_choosen, p_predicted_subgroup_val[:, i], 'probability')\n","            lambda_p_logit /= lambda_p_logit_in_distribution\n","            lambda_p_prob /= lambda_p_prob_in_distribution\n","\n","        p_mean_test_val = np.mean(p_predicted_subgroup_test_val[:, i])\n","        p_mean_logit_test_val = np.mean(np.log(p_predicted_subgroup_test_val[:, i]/(1-p_predicted_subgroup_test_val[:, i])))\n","        refined_prediction_logit = prediction_transformation(p, ind, lambda_p_logit, 'logit', p_mean_test_val, p_mean_logit_test_val)\n","        refined_prediction_prob = prediction_transformation(p, ind, lambda_p_prob, 'probability', p_mean_test_val, p_mean_logit_test_val)\n","        log_loss_original = bce(y[ind], p[ind]).numpy()\n","        log_loss_logit = bce(y[ind], refined_prediction_logit).numpy()\n","        log_loss_prob = bce(y[ind], refined_prediction_prob).numpy()\n","        log_loss_logit_improve = (log_loss_logit - log_loss_original) / log_loss_original * 100\n","        log_loss_prob_improve = (log_loss_prob - log_loss_original) / log_loss_original * 100\n","\n","        n_bins = 50\n","        Vanilla_ECE, Vanilla_MCE, Vanilla_Brier = calculate_ECE_MCE_Brier(p[ind], y[ind], n_bins)\n","        VAD_ECE, VAD_MCE, VAD_Brier = calculate_ECE_MCE_Brier(refined_prediction_logit, y[ind], n_bins)\n","        VAD_Prob_ECE, VAD_Prob_MCE, VAD_Prob_Brier = calculate_ECE_MCE_Brier(refined_prediction_prob, y[ind], n_bins)\n","\n","        curr = {\n","            'total_calibration': np.sum(p) / np.sum(y),\n","            'positive_ratio_after_selection': np.sum(y[ind]) / len(y[ind]),\n","            'Vanilla': np.sum(p[ind]) / np.sum(y[ind]),\n","            'Vanilla ECE': Vanilla_ECE,\n","            'Vanilla MCE': Vanilla_MCE,\n","            'Vanilla Brier': Vanilla_Brier,\n","            'VAD': np.sum(refined_prediction_logit) / np.sum(y[ind]),\n","            'VAD ECE': VAD_ECE,\n","            'VAD MCE': VAD_MCE,\n","            'VAD Brier': VAD_Brier,\n","            'VAD prob': np.sum(refined_prediction_prob) / np.sum(y[ind]),\n","            'VAD prob ECE': VAD_Prob_ECE,\n","            'VAD prob MCE': VAD_Prob_MCE,\n","            'VAD prob Brier': VAD_Prob_Brier,\n","            'Lambda logit': lambda_p_logit,\n","            'Lambda prob': lambda_p_prob,\n","            'Mu logit': p_mean_logit_test_val,\n","            'Mu prob': p_mean_test_val,\n","            'Log Loss Logit Improvement': log_loss_logit_improve,\n","            'Log Loss Prob Improvement': log_loss_prob_improve,\n","        }\n","        result = append_result(result, curr)\n","        \n","        log_loss = bce(y, p).numpy()\n","        if print_individual_result:\n","            print(\"LogLoss: \", log_loss)\n","            for result_key in result_keys:\n","                print(result_key, \": \", curr[result_key])\n","            print()\n","\n","    print(\"Num Group: \", num_group)\n","    print(\"Alpha: \", alpha)\n","    for result_key in result_keys:\n","        result[result_key] = np.array(result[result_key])\n","        print(\"mean of \", result_key, \": \", np.mean(result[result_key]))\n","    print()\n","\n","    return result\n","\n","def generate_multiple_report(\n","    p_predicted_subgroup, \n","    p_predicted_subgroup_pre_cali,\n","    y, \n","    p_predicted_subgroup_val=None,\n","):\n","    num_group = p_predicted_subgroup.shape[1]\n","    report = {}\n","    alphas = [0.02, 0.1]\n","    for alpha in alphas:\n","        result = generate_report(\n","            y, \n","            p_predicted_subgroup, \n","            p_predicted_subgroup_pre_cali,\n","            alpha, \n","            False,\n","            num_group_bootstrap,\n","            p_predicted_subgroup_val,\n","        )\n","        alpha_key = int(1000 * alpha)\n","        report[alpha_key] = {}\n","        for result_key in reported_result_keys:\n","            curr_result = result[result_key]\n","            total_num_result = curr_result.shape[0]\n","            report[alpha_key][result_key] = (np.mean(curr_result), np.std(curr_result) / np.sqrt(total_num_result))\n","    return report\n","\n","\n","result_keys = [\n","    'total_calibration',\n","    'positive_ratio_after_selection',\n","    'Vanilla',\n","    'Vanilla ECE',\n","    'Vanilla MCE',\n","    'Vanilla Brier',\n","    'VAD',\n","    'VAD ECE',\n","    'VAD MCE',\n","    'VAD Brier',\n","    'VAD prob',\n","    'VAD prob ECE',\n","    'VAD prob MCE',\n","    'VAD prob Brier',\n","    'Lambda logit',\n","    'Lambda prob',\n","    'Mu logit',\n","    'Mu prob',\n","    'Log Loss Logit Improvement',\n","    'Log Loss Prob Improvement',\n","]\n","\n","reported_result_keys = [\n","    'Vanilla',\n","    'Vanilla ECE',\n","    'Vanilla MCE',\n","    'VAD',\n","    'VAD ECE',\n","    'VAD MCE',\n","]\n","\n","final_report = {}\n","final_report[\"none\"] = generate_multiple_report(\n","    p_predicted_subgroup, \n","    p_predicted_subgroup,\n","    y,\n",")\n","\n","print(\"Calibration Result (calibrated on all data)\\n\")\n","\n","for calibration_method in [\"hist_bin\", \"platt\", \"platt_hist\", \"isotonic\"]:\n","    print(f\"Calibration Method: {calibration_method}\")\n","    final_report[calibration_method] = generate_multiple_report(\n","        p_predicted_subgroup_calibrated[calibration_method], \n","        p_predicted_subgroup,\n","        y, \n","        p_predicted_subgroup_original_val,\n","    )"],"metadata":{"id":"RaSRfHBqy9Jc"},"execution_count":null,"outputs":[]}]}