from fairlib.src import analysis
from fairlib.src.analysis.utils import auc_performance_fairness_tradeoff
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib.colors as mcolors
import torch
c:\Users\emano\.conda\envs\py38\lib\site-packages\tqdm\auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
import yaml
with open("dataset_specific_hyperparameters.yaml", 'r') as f:
dataset_specific_hyperparameters = yaml.full_load(f)
# Minimum accuracy of each dataset
# Consistent with Table 8 (Appendix B) in the paper
datasets = {
"Moji":0.5,
"Bios_gender":0.3,
"Bios_economy":0.3,
"Bios_intersection":0.3,
"Trustpilot_gender":0.68,
"Trustpilot_age":0.68,
"Trustpilot_country":0.68,
"Trustpilot_intersection":0.68,
"Adult_gender":0.76,
"Adult_race":0.76,
"Adult_intersection":0.76,
"COMPAS_gender":0.56,
"COMPAS_race":0.56,
"COMPAS_intersection":0.56,
}
def corrected_retrive_results(dataset, log_dir, fairness_metric = "TPR_GAP"):
_results = analysis.tables_and_figures.retrive_results(dataset, log_dir=log_dir)
n_groups = dataset_specific_hyperparameters[dataset]["num_groups"]
# Normalize the sum of gap by the number of groups
for _k in _results.keys():
_results[_k]["dev_fairness"] = 1 - (_results[_k]["dev_"+fairness_metric] / n_groups * 2)
_results[_k]["test_fairness"] = 1 - (_results[_k]["test_"+fairness_metric] / n_groups * 2)
return _results
def make_plot(
plot_df, figure_name=None,
xlim=None, ylim=None,
figsize=(7.5, 6)
):
plot_df["Fairness"] = plot_df["test_fairness mean"]
plot_df["Performance"] = plot_df["test_performance mean"]
fig, ax = plt.subplots(figsize=figsize)
with sns.axes_style("white"):
sns.lineplot(
data=plot_df,
x="Performance",
y="Fairness",
hue="Models",
markers=True,
style="Models",
ax=ax
)
if xlim is not None:
ax.set_xlim(xlim)
if ylim is not None:
ax.set_ylim(ylim)
sns.move_legend(ax, "lower left")
if figure_name is not None:
fig.savefig("full_results_figures/appendix_PFC_{}.pdf".format(figure_name), format="pdf", dpi=960, bbox_inches="tight")
_model_order = {
"Vanilla":"Vanilla",
"EOCla":"GD$_{CLA}$",
"EOGlb":"GD$_{GLB}$",
"FairBatch":"FairBatch",
"Adv":"Adv",
"DecoupledAdv":"SemiAdv",
"ARL":"ARL",
"UEOCla":"ULPL+GD$_{CLA}$",
"UEOGlb":"ULPL+GD$_{GLB}$",
"UAdv":"ULPL+Adv"
}
def get_renamed_main_results(_dataset):
_results = corrected_retrive_results(_dataset, log_dir='bs1024')
_main_results = analysis.final_results_df(
results_dict = _results,
pareto = True,
pareto_selection = "test",
selection_criterion = None,
return_dev = True,
return_conf=True,
)
_renamed_dfs = []
for original_model_name in _model_order.keys():
_model_df = _main_results[_main_results["Models"]==original_model_name]
_model_df = _model_df.append(
{
'test_performance mean':datasets[_dataset],
'test_performance std':0,
'test_fairness mean':1,
'test_fairness std':0,
'dev_performance mean':datasets[_dataset],
'dev_performance std':0,
'dev_fairness mean':1,
'dev_fairness std':0,
}, ignore_index=True
)
_model_df = _model_df[_model_df["test_performance mean"]>=datasets[_dataset]]
_model_df["Models"] = _model_order[original_model_name]
_renamed_dfs.append(_model_df)
renamed_main_results = pd.concat(_renamed_dfs).reset_index(drop=True)
return renamed_main_results
def exam_selection_criteria(
results_dict,
selection_criterion = "DTO",
Fairness_threshold = 0.0,
Performance_threshold = 0.0
):
_selected_df = analysis.final_results_df(
results_dict = results_dict,
pareto = True,
pareto_selection = "test",
selection_criterion = selection_criterion,
Fairness_threshold = Fairness_threshold,
Performance_threshold = Performance_threshold,
return_dev = True,
)
_model_names = _selected_df["Models"]
_selected_df = _selected_df*100
_selected_df["Models"] = _model_names
_selected_df = _selected_df.drop(columns=['is_pareto']).fillna(0)
return _selected_df
def one_particular_selection_results(_results, _dataset, Performance_threshold, selection_name = "F@P-5%"):
_selected_results = exam_selection_criteria(_results, selection_criterion = "fairness", Performance_threshold=Performance_threshold)
_renamed_dfs = []
for original_model_name in _model_order.keys():
_model_df = _selected_results[_selected_results["Models"]==original_model_name]
if len(_model_df) == 0:
_model_df = _model_df.append(
{
'test_performance mean':datasets[_dataset]*100,
'test_performance std':0,
'test_fairness mean':100,
'test_fairness std':0,
'dev_performance mean':datasets[_dataset]*100,
'dev_performance std':0,
'dev_fairness mean':1*100,
'dev_fairness std':0,
}, ignore_index=True
)
_model_df["Models"] = _model_order[original_model_name]
_renamed_dfs.append(_model_df)
renamed_main_results = pd.concat(_renamed_dfs).reset_index(drop=True)
renamed_main_results.insert(0, "Selection", [selection_name]*len(renamed_main_results))
return renamed_main_results
def dataset_specific_selected_results(_dataset, vanilla_performance):
_results = corrected_retrive_results(_dataset, log_dir='bs1024')
tradeoff5_result_df = one_particular_selection_results(
_results, _dataset, Performance_threshold = vanilla_performance-0.05,
selection_name = "F@P-5%")
tradeoff10_result_df = one_particular_selection_results(
_results, _dataset, Performance_threshold = vanilla_performance-0.1,
selection_name = "F@P-10%")
combined_result_df = pd.concat([tradeoff5_result_df, tradeoff10_result_df])
combined_result_df = combined_result_df.drop(columns="DTO")
# for i, _line in enumerate(combined_result_df.to_latex().split("\n")[4:26]):
# _line = _line[2:]
# if i ==0:
# _line = '\\multirow{4}{*}{\\bf F@P-5\\%} ' + _line
# elif i == 10:
# _line = _line+' \midrule'
# elif i == 11:
# _line = '\\multirow{4}{*}{\\bf F@P-10\\%} ' + _line
# else:
# pass
# print(_line)
return combined_result_df
_dataset = "Moji"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.71087772)
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 71.087772 | 1.104763 | 63.580370 | 1.330876 | 71.3700 | 1.209616 | 65.887082 | 1.173040 |
1 | F@P-5% | GD$_{CLA}$ | 74.201050 | 0.254340 | 92.942429 | 1.363938 | 73.7300 | 0.276925 | 94.455617 | 1.033091 |
2 | F@P-5% | GD$_{GLB}$ | 75.888972 | 0.364697 | 77.630441 | 1.255683 | 75.2375 | 0.206345 | 79.664837 | 1.569792 |
3 | F@P-5% | FairBatch | 75.428857 | 0.396903 | 90.337381 | 0.821882 | 74.7650 | 0.207553 | 90.875510 | 0.741353 |
4 | F@P-5% | Adv | 75.286322 | 0.448076 | 89.690495 | 1.557143 | 74.8950 | 0.319863 | 91.017178 | 0.804605 |
5 | F@P-5% | SemiAdv | 75.671418 | 0.214092 | 90.066419 | 0.536499 | 74.8725 | 0.430969 | 91.059004 | 0.463462 |
6 | F@P-5% | ARL | 70.717679 | 1.013717 | 78.480301 | 5.537474 | 69.1825 | 0.986811 | 78.070555 | 5.742734 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 66.726682 | 2.569649 | 87.954626 | 6.960715 | 66.1525 | 2.375977 | 88.763821 | 6.769928 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 72.605651 | 0.434742 | 65.149981 | 1.553866 | 72.4525 | 0.643562 | 66.902756 | 1.303676 |
9 | F@P-5% | ULPL+Adv | 67.791948 | 5.618712 | 69.418234 | 9.154473 | 68.2225 | 5.485545 | 71.186743 | 8.932834 |
0 | F@P-10% | Vanilla | 71.087772 | 1.104763 | 63.580370 | 1.330876 | 71.3700 | 1.209616 | 65.887082 | 1.173040 |
1 | F@P-10% | GD$_{CLA}$ | 74.201050 | 0.254340 | 92.942429 | 1.363938 | 73.7300 | 0.276925 | 94.455617 | 1.033091 |
2 | F@P-10% | GD$_{GLB}$ | 75.888972 | 0.364697 | 77.630441 | 1.255683 | 75.2375 | 0.206345 | 79.664837 | 1.569792 |
3 | F@P-10% | FairBatch | 75.428857 | 0.396903 | 90.337381 | 0.821882 | 74.7650 | 0.207553 | 90.875510 | 0.741353 |
4 | F@P-10% | Adv | 75.286322 | 0.448076 | 89.690495 | 1.557143 | 74.8950 | 0.319863 | 91.017178 | 0.804605 |
5 | F@P-10% | SemiAdv | 75.671418 | 0.214092 | 90.066419 | 0.536499 | 74.8725 | 0.430969 | 91.059004 | 0.463462 |
6 | F@P-10% | ARL | 63.683421 | 3.815591 | 84.635011 | 4.182664 | 62.9975 | 3.946185 | 85.894781 | 3.526161 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 64.636159 | 3.270525 | 92.506395 | 3.022296 | 64.7725 | 2.213266 | 92.635257 | 2.433825 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 72.605651 | 0.434742 | 65.149981 | 1.553866 | 72.4525 | 0.643562 | 66.902756 | 1.303676 |
9 | F@P-10% | ULPL+Adv | 67.791948 | 5.618712 | 69.418234 | 9.154473 | 68.2225 | 5.485545 | 71.186743 | 8.932834 |
_dataset = "Bios_gender"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.81319881)
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 81.319881 | 0.372712 | 83.706045 | 0.263336 | 80.589631 | 0.398200 | 84.194283 | 0.748917 |
1 | F@P-5% | GD$_{CLA}$ | 77.623784 | 0.672984 | 90.246326 | 0.959518 | 77.075575 | 0.578372 | 91.649234 | 1.135619 |
2 | F@P-5% | GD$_{GLB}$ | 77.932570 | 1.060546 | 87.715281 | 1.380087 | 77.422417 | 0.840729 | 88.853155 | 0.863438 |
3 | F@P-5% | FairBatch | 81.169796 | 0.299047 | 87.475654 | 0.809134 | 80.470975 | 0.304957 | 87.622501 | 0.605840 |
4 | F@P-5% | Adv | 80.940720 | 0.247101 | 88.456727 | 0.629951 | 80.147864 | 0.255323 | 86.921011 | 1.141478 |
5 | F@P-5% | SemiAdv | 80.877527 | 0.245439 | 88.060186 | 0.546211 | 80.111355 | 0.172216 | 87.189869 | 0.735913 |
6 | F@P-5% | ARL | 81.425443 | 0.287146 | 83.424525 | 0.786107 | 80.516612 | 0.374202 | 83.732107 | 0.790809 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 77.809055 | 0.379949 | 85.688689 | 1.182147 | 77.259949 | 0.442402 | 87.604539 | 1.428555 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 79.304154 | 0.550067 | 85.338175 | 1.027661 | 78.492150 | 0.696219 | 85.895512 | 0.504734 |
9 | F@P-5% | ULPL+Adv | 81.085060 | 0.459097 | 83.990832 | 0.543222 | 80.197152 | 0.484295 | 84.557463 | 0.853407 |
0 | F@P-10% | Vanilla | 81.319881 | 0.372712 | 83.706045 | 0.263336 | 80.589631 | 0.398200 | 84.194283 | 0.748917 |
1 | F@P-10% | GD$_{CLA}$ | 73.873110 | 0.570164 | 93.345429 | 1.421908 | 73.848120 | 0.464082 | 94.413195 | 0.940014 |
2 | F@P-10% | GD$_{GLB}$ | 77.932570 | 1.060546 | 87.715281 | 1.380087 | 77.422417 | 0.840729 | 88.853155 | 0.863438 |
3 | F@P-10% | FairBatch | 81.169796 | 0.299047 | 87.475654 | 0.809134 | 80.470975 | 0.304957 | 87.622501 | 0.605840 |
4 | F@P-10% | Adv | 80.940720 | 0.247101 | 88.456727 | 0.629951 | 80.147864 | 0.255323 | 86.921011 | 1.141478 |
5 | F@P-10% | SemiAdv | 80.877527 | 0.245439 | 88.060186 | 0.546211 | 80.111355 | 0.172216 | 87.189869 | 0.735913 |
6 | F@P-10% | ARL | 81.425443 | 0.287146 | 83.424525 | 0.786107 | 80.516612 | 0.374202 | 83.732107 | 0.790809 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 74.013141 | 0.900708 | 87.207503 | 1.688826 | 73.563344 | 1.126758 | 89.003631 | 1.718150 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 75.980755 | 1.146609 | 86.175286 | 2.144775 | 75.304856 | 1.145220 | 86.694062 | 2.137639 |
9 | F@P-10% | ULPL+Adv | 81.085060 | 0.459097 | 83.990832 | 0.543222 | 80.197152 | 0.484295 | 84.557463 | 0.853407 |
_dataset = "Bios_economy"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.81319881)
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 81.210729 | 0.167701 | 91.453007 | 0.569017 | 80.470975 | 0.206792 | 91.309847 | 0.949070 |
1 | F@P-5% | GD$_{CLA}$ | 77.618039 | 0.283852 | 94.568235 | 1.740487 | 77.185104 | 0.243259 | 94.542126 | 0.680414 |
2 | F@P-5% | GD$_{GLB}$ | 81.386665 | 0.192739 | 91.515599 | 0.423581 | 80.509310 | 0.337443 | 92.022176 | 0.789046 |
3 | F@P-5% | FairBatch | 81.519515 | 0.175808 | 92.153284 | 0.412501 | 80.542169 | 0.166360 | 91.306913 | 0.343108 |
4 | F@P-5% | Adv | 81.459912 | 0.251858 | 91.949759 | 1.492307 | 80.722892 | 0.236610 | 90.765038 | 0.863258 |
5 | F@P-5% | SemiAdv | 81.495817 | 0.189426 | 91.216804 | 1.188129 | 80.759401 | 0.294421 | 90.947824 | 1.153236 |
6 | F@P-5% | ARL | 81.227245 | 0.160926 | 90.778799 | 0.700788 | 80.587806 | 0.204951 | 90.849908 | 1.237927 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 78.855337 | 0.423402 | 93.722928 | 1.036452 | 78.382621 | 0.256105 | 93.489210 | 0.795169 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 77.746580 | 0.284734 | 92.289495 | 1.395198 | 77.046367 | 0.220328 | 92.406722 | 0.352765 |
9 | F@P-5% | ULPL+Adv | 79.647409 | 0.408648 | 92.168986 | 1.165822 | 79.070829 | 0.328207 | 90.879209 | 0.958528 |
0 | F@P-10% | Vanilla | 81.210729 | 0.167701 | 91.453007 | 0.569017 | 80.470975 | 0.206792 | 91.309847 | 0.949070 |
1 | F@P-10% | GD$_{CLA}$ | 76.224911 | 0.583109 | 95.713807 | 0.942884 | 75.819642 | 0.336689 | 95.639929 | 0.952554 |
2 | F@P-10% | GD$_{GLB}$ | 81.386665 | 0.192739 | 91.515599 | 0.423581 | 80.509310 | 0.337443 | 92.022176 | 0.789046 |
3 | F@P-10% | FairBatch | 81.519515 | 0.175808 | 92.153284 | 0.412501 | 80.542169 | 0.166360 | 91.306913 | 0.343108 |
4 | F@P-10% | Adv | 81.459912 | 0.251858 | 91.949759 | 1.492307 | 80.722892 | 0.236610 | 90.765038 | 0.863258 |
5 | F@P-10% | SemiAdv | 81.495817 | 0.189426 | 91.216804 | 1.188129 | 80.759401 | 0.294421 | 90.947824 | 1.153236 |
6 | F@P-10% | ARL | 81.227245 | 0.160926 | 90.778799 | 0.700788 | 80.587806 | 0.204951 | 90.849908 | 1.237927 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 72.298302 | 0.695014 | 95.160000 | 2.109071 | 71.944140 | 0.580521 | 94.616393 | 1.402168 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 77.746580 | 0.284734 | 92.289495 | 1.395198 | 77.046367 | 0.220328 | 92.406722 | 0.352765 |
9 | F@P-10% | ULPL+Adv | 79.647409 | 0.408648 | 92.168986 | 1.165822 | 79.070829 | 0.328207 | 90.879209 | 0.958528 |
_dataset = "Bios_intersection"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.81319881)
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 81.208574 | 0.278291 | 77.742934 | 1.393501 | 80.518437 | 0.347777 | 77.171141 | 0.554821 |
1 | F@P-5% | GD$_{CLA}$ | 78.120714 | 0.451592 | 84.031245 | 1.965084 | 77.342096 | 0.416604 | 84.130875 | 0.507210 |
2 | F@P-5% | GD$_{GLB}$ | 79.472909 | 0.603012 | 80.397254 | 0.951479 | 78.742242 | 0.487484 | 78.107341 | 0.891311 |
3 | F@P-5% | FairBatch | 80.704463 | 0.322649 | 81.699337 | 0.491699 | 79.837532 | 0.183345 | 79.843823 | 0.789960 |
4 | F@P-5% | Adv | 80.211842 | 0.859588 | 81.288524 | 0.701402 | 79.401241 | 0.731003 | 78.880363 | 0.748327 |
5 | F@P-5% | SemiAdv | 80.013644 | 1.658296 | 81.582223 | 0.947350 | 79.479737 | 1.340727 | 78.736862 | 0.568701 |
6 | F@P-5% | ARL | 80.235539 | 2.296667 | 78.415333 | 0.736374 | 79.437751 | 1.971854 | 77.676129 | 0.384372 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 78.734695 | 0.275357 | 81.391566 | 0.258883 | 77.982840 | 0.403490 | 81.155965 | 0.433602 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 78.334710 | 0.949729 | 79.559656 | 1.334804 | 77.561154 | 0.857482 | 79.684563 | 1.434975 |
9 | F@P-5% | ULPL+Adv | 79.671107 | 1.898005 | 78.352466 | 0.977720 | 78.997809 | 1.607411 | 77.430733 | 1.198612 |
0 | F@P-10% | Vanilla | 81.208574 | 0.278291 | 77.742934 | 1.393501 | 80.518437 | 0.347777 | 77.171141 | 0.554821 |
1 | F@P-10% | GD$_{CLA}$ | 73.812789 | 1.287958 | 89.007447 | 2.321264 | 73.594378 | 1.215164 | 87.061471 | 0.770755 |
2 | F@P-10% | GD$_{GLB}$ | 76.709633 | 2.709371 | 81.011786 | 0.746352 | 76.144578 | 2.292033 | 79.955897 | 1.120463 |
3 | F@P-10% | FairBatch | 80.704463 | 0.322649 | 81.699337 | 0.491699 | 79.837532 | 0.183345 | 79.843823 | 0.789960 |
4 | F@P-10% | Adv | 72.039783 | 7.935551 | 83.730010 | 5.405406 | 71.635633 | 8.051407 | 83.121696 | 4.283392 |
5 | F@P-10% | SemiAdv | 72.039783 | 5.193244 | 82.836798 | 2.572327 | 71.723257 | 5.095916 | 82.108681 | 1.581228 |
6 | F@P-10% | ARL | 73.541345 | 5.262867 | 78.564943 | 2.293222 | 73.194597 | 5.178430 | 78.770497 | 1.789653 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 74.543822 | 0.643455 | 86.038809 | 1.766589 | 73.994158 | 0.586553 | 85.083362 | 0.943596 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 72.322717 | 11.992396 | 83.355767 | 6.399627 | 71.663016 | 11.941428 | 81.833388 | 5.225053 |
9 | F@P-10% | ULPL+Adv | 72.654483 | 3.634131 | 79.989853 | 1.909363 | 72.155896 | 3.343992 | 80.880114 | 1.498887 |
_dataset = "Trustpilot_gender"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.78116426 )
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 81.297738 | 0.276972 | 96.325770 | 0.975436 | 80.786517 | 0.103421 | 97.129318 | 0.884796 |
1 | F@P-5% | GD$_{CLA}$ | 78.116426 | 0.312592 | 98.389091 | 0.494388 | 78.265918 | 0.142986 | 99.226261 | 0.420856 |
2 | F@P-5% | GD$_{GLB}$ | 81.145717 | 0.190872 | 96.963263 | 0.488182 | 80.681648 | 0.062950 | 97.608850 | 0.797853 |
3 | F@P-5% | FairBatch | 80.893585 | 0.131484 | 96.848545 | 0.748260 | 80.779026 | 0.178444 | 97.476622 | 0.612677 |
4 | F@P-5% | Adv | 81.205043 | 0.335092 | 96.811112 | 0.551941 | 80.756554 | 0.212115 | 97.527805 | 0.597171 |
5 | F@P-5% | SemiAdv | 81.338524 | 0.162997 | 96.464784 | 0.663348 | 80.704120 | 0.201082 | 97.185197 | 0.537677 |
6 | F@P-5% | ARL | 81.294030 | 0.288401 | 95.925337 | 0.100262 | 80.674157 | 0.211867 | 97.124206 | 0.261612 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 73.025584 | 3.952008 | 99.510579 | 0.699124 | 73.153558 | 4.028168 | 99.457324 | 0.528811 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 80.563589 | 0.466729 | 97.491797 | 0.634631 | 80.494382 | 0.209621 | 97.671374 | 1.041698 |
9 | F@P-5% | ULPL+Adv | 73.618836 | 0.000000 | 99.722672 | 0.000000 | 73.558052 | 0.000000 | 99.007970 | 0.000000 |
0 | F@P-10% | Vanilla | 81.297738 | 0.276972 | 96.325770 | 0.975436 | 80.786517 | 0.103421 | 97.129318 | 0.884796 |
1 | F@P-10% | GD$_{CLA}$ | 78.116426 | 0.312592 | 98.389091 | 0.494388 | 78.265918 | 0.142986 | 99.226261 | 0.420856 |
2 | F@P-10% | GD$_{GLB}$ | 81.145717 | 0.190872 | 96.963263 | 0.488182 | 80.681648 | 0.062950 | 97.608850 | 0.797853 |
3 | F@P-10% | FairBatch | 80.893585 | 0.131484 | 96.848545 | 0.748260 | 80.779026 | 0.178444 | 97.476622 | 0.612677 |
4 | F@P-10% | Adv | 81.205043 | 0.335092 | 96.811112 | 0.551941 | 80.756554 | 0.212115 | 97.527805 | 0.597171 |
5 | F@P-10% | SemiAdv | 81.338524 | 0.162997 | 96.464784 | 0.663348 | 80.704120 | 0.201082 | 97.185197 | 0.537677 |
6 | F@P-10% | ARL | 81.294030 | 0.288401 | 95.925337 | 0.100262 | 80.674157 | 0.211867 | 97.124206 | 0.261612 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 68.946978 | 0.000000 | 99.999994 | 0.000000 | 68.970037 | 0.000000 | 99.999994 | 0.000000 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 80.563589 | 0.466729 | 97.491797 | 0.634631 | 80.494382 | 0.209621 | 97.671374 | 1.041698 |
9 | F@P-10% | ULPL+Adv | 68.946978 | 0.000000 | 99.999994 | 0.000000 | 68.970037 | 0.000000 | 99.999994 | 0.000000 |
_dataset = "Trustpilot_age"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.78116426)
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 81.045606 | 0.258087 | 97.097566 | 0.305995 | 80.782772 | 0.083117 | 96.578885 | 0.541678 |
1 | F@P-5% | GD$_{CLA}$ | 77.871709 | 0.218809 | 99.359334 | 0.541717 | 78.104869 | 0.078785 | 99.412237 | 0.444083 |
2 | F@P-5% | GD$_{GLB}$ | 81.056730 | 0.462624 | 97.701366 | 0.658103 | 80.625468 | 0.217825 | 97.351105 | 0.465471 |
3 | F@P-5% | FairBatch | 81.190211 | 0.313963 | 97.343838 | 0.364547 | 80.719101 | 0.099621 | 97.683762 | 1.281547 |
4 | F@P-5% | Adv | 80.752688 | 0.461136 | 98.244563 | 0.712342 | 80.629213 | 0.256288 | 97.315322 | 0.728022 |
5 | F@P-5% | SemiAdv | 80.908417 | 0.377989 | 98.205712 | 0.786217 | 80.677903 | 0.125202 | 97.486047 | 1.312259 |
6 | F@P-5% | ARL | 81.159622 | 0.382156 | 98.170763 | 0.154956 | 80.702247 | 0.186327 | 97.282461 | 0.596821 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 77.385984 | 0.247134 | 99.702920 | 0.236841 | 77.430712 | 0.330088 | 99.171007 | 0.659483 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 80.782351 | 0.468016 | 98.296809 | 0.404774 | 80.636704 | 0.290413 | 97.580444 | 0.394189 |
9 | F@P-5% | ULPL+Adv | 78.394512 | 0.794331 | 99.368824 | 0.407081 | 78.393258 | 0.729544 | 98.867835 | 0.717831 |
0 | F@P-10% | Vanilla | 81.045606 | 0.258087 | 97.097566 | 0.305995 | 80.782772 | 0.083117 | 96.578885 | 0.541678 |
1 | F@P-10% | GD$_{CLA}$ | 77.871709 | 0.218809 | 99.359334 | 0.541717 | 78.104869 | 0.078785 | 99.412237 | 0.444083 |
2 | F@P-10% | GD$_{GLB}$ | 81.056730 | 0.462624 | 97.701366 | 0.658103 | 80.625468 | 0.217825 | 97.351105 | 0.465471 |
3 | F@P-10% | FairBatch | 81.190211 | 0.313963 | 97.343838 | 0.364547 | 80.719101 | 0.099621 | 97.683762 | 1.281547 |
4 | F@P-10% | Adv | 80.752688 | 0.461136 | 98.244563 | 0.712342 | 80.629213 | 0.256288 | 97.315322 | 0.728022 |
5 | F@P-10% | SemiAdv | 80.908417 | 0.377989 | 98.205712 | 0.786217 | 80.677903 | 0.125202 | 97.486047 | 1.312259 |
6 | F@P-10% | ARL | 81.159622 | 0.382156 | 98.170763 | 0.154956 | 80.702247 | 0.186327 | 97.282461 | 0.596821 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 68.946978 | 0.000000 | 99.999994 | 0.000000 | 68.970037 | 0.000000 | 99.999994 | 0.000000 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 80.782351 | 0.468016 | 98.296809 | 0.404774 | 80.636704 | 0.290413 | 97.580444 | 0.394189 |
9 | F@P-10% | ULPL+Adv | 68.946978 | 0.000000 | 99.999994 | 0.000000 | 68.970037 | 0.000000 | 99.999994 | 0.000000 |
_dataset = "Trustpilot_country"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.78116426)
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 80.745273 | 0.225004 | 96.005711 | 0.529619 | 80.561798 | 0.212281 | 96.253874 | 1.450056 |
1 | F@P-5% | GD$_{CLA}$ | 77.916203 | 0.093984 | 99.158562 | 0.312946 | 77.988764 | 0.102227 | 99.156381 | 0.535753 |
2 | F@P-5% | GD$_{GLB}$ | 81.112347 | 0.236474 | 96.220421 | 1.052576 | 80.696629 | 0.130684 | 96.775908 | 0.845073 |
3 | F@P-5% | FairBatch | 80.904709 | 0.244548 | 95.775919 | 0.969324 | 80.707865 | 0.195691 | 97.149739 | 1.129746 |
4 | F@P-5% | Adv | 80.626622 | 0.684819 | 97.255470 | 1.227669 | 80.385768 | 0.316030 | 96.934594 | 0.781834 |
5 | F@P-5% | SemiAdv | 81.075269 | 0.497698 | 95.996763 | 0.635262 | 80.520599 | 0.319287 | 97.486839 | 1.042398 |
6 | F@P-5% | ARL | 81.168891 | 0.226741 | 96.009249 | 1.013241 | 80.856742 | 0.156957 | 97.258116 | 0.536135 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 73.685577 | 4.331704 | 99.392089 | 1.002122 | 73.677903 | 4.311685 | 99.284077 | 0.819403 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 81.319985 | 0.224239 | 95.619971 | 1.110626 | 80.726592 | 0.211619 | 98.031845 | 0.531160 |
9 | F@P-5% | ULPL+Adv | 78.038561 | 0.074848 | 99.315864 | 0.252156 | 78.149813 | 0.038378 | 99.622373 | 0.315328 |
0 | F@P-10% | Vanilla | 80.745273 | 0.225004 | 96.005711 | 0.529619 | 80.561798 | 0.212281 | 96.253874 | 1.450056 |
1 | F@P-10% | GD$_{CLA}$ | 77.916203 | 0.093984 | 99.158562 | 0.312946 | 77.988764 | 0.102227 | 99.156381 | 0.535753 |
2 | F@P-10% | GD$_{GLB}$ | 81.112347 | 0.236474 | 96.220421 | 1.052576 | 80.696629 | 0.130684 | 96.775908 | 0.845073 |
3 | F@P-10% | FairBatch | 80.904709 | 0.244548 | 95.775919 | 0.969324 | 80.707865 | 0.195691 | 97.149739 | 1.129746 |
4 | F@P-10% | Adv | 80.626622 | 0.684819 | 97.255470 | 1.227669 | 80.385768 | 0.316030 | 96.934594 | 0.781834 |
5 | F@P-10% | SemiAdv | 81.075269 | 0.497698 | 95.996763 | 0.635262 | 80.520599 | 0.319287 | 97.486839 | 1.042398 |
6 | F@P-10% | ARL | 81.168891 | 0.226741 | 96.009249 | 1.013241 | 80.856742 | 0.156957 | 97.258116 | 0.536135 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 68.946978 | 0.000000 | 99.999986 | 0.000000 | 68.970037 | 0.000000 | 99.999984 | 0.000000 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 81.319985 | 0.224239 | 95.619971 | 1.110626 | 80.726592 | 0.211619 | 98.031845 | 0.531160 |
9 | F@P-10% | ULPL+Adv | 68.946978 | 0.000000 | 99.999986 | 0.000000 | 68.970037 | 0.000000 | 99.999984 | 0.000000 |
_dataset = "Trustpilot_intersection"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.78116426)
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 80.029663 | 0.947800 | 91.049808 | 1.825704 | 79.670412 | 0.579708 | 92.798630 | 0.357842 |
1 | F@P-5% | GD$_{CLA}$ | 75.491287 | 3.754640 | 97.383503 | 1.618115 | 75.363296 | 3.729569 | 98.174345 | 1.113623 |
2 | F@P-5% | GD$_{GLB}$ | 80.278087 | 0.688123 | 90.910057 | 1.569654 | 79.898876 | 0.447209 | 92.985895 | 0.596136 |
3 | F@P-5% | FairBatch | 79.907304 | 0.847584 | 90.506805 | 2.281302 | 79.640449 | 0.520384 | 93.323170 | 1.266322 |
4 | F@P-5% | Adv | 79.143493 | 0.846225 | 94.218964 | 0.420659 | 79.250936 | 0.910024 | 94.179588 | 0.997557 |
5 | F@P-5% | SemiAdv | 77.960697 | 1.305159 | 94.742426 | 1.339536 | 78.022472 | 0.917087 | 95.771247 | 1.074950 |
6 | F@P-5% | ARL | 77.376097 | 0.993242 | 94.874764 | 0.727538 | 77.322097 | 1.023816 | 94.755960 | 1.027630 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 73.659622 | 4.324553 | 98.313028 | 1.655804 | 73.573034 | 4.233233 | 98.897875 | 1.056645 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 74.123100 | 4.716400 | 97.528983 | 2.044146 | 74.022472 | 4.588328 | 98.246164 | 1.359297 |
9 | F@P-5% | ULPL+Adv | 74.134223 | 4.737124 | 97.380474 | 2.408097 | 74.258427 | 4.829239 | 97.772977 | 2.188878 |
0 | F@P-10% | Vanilla | 80.029663 | 0.947800 | 91.049808 | 1.825704 | 79.670412 | 0.579708 | 92.798630 | 0.357842 |
1 | F@P-10% | GD$_{CLA}$ | 68.946978 | 0.000000 | 99.999912 | 0.000000 | 68.970037 | 0.000000 | 99.999910 | 0.000000 |
2 | F@P-10% | GD$_{GLB}$ | 80.278087 | 0.688123 | 90.910057 | 1.569654 | 79.898876 | 0.447209 | 92.985895 | 0.596136 |
3 | F@P-10% | FairBatch | 79.907304 | 0.847584 | 90.506805 | 2.281302 | 79.640449 | 0.520384 | 93.323170 | 1.266322 |
4 | F@P-10% | Adv | 79.143493 | 0.846225 | 94.218964 | 0.420659 | 79.250936 | 0.910024 | 94.179588 | 0.997557 |
5 | F@P-10% | SemiAdv | 77.960697 | 1.305159 | 94.742426 | 1.339536 | 78.022472 | 0.917087 | 95.771247 | 1.074950 |
6 | F@P-10% | ARL | 77.376097 | 0.993242 | 94.874764 | 0.727538 | 77.322097 | 1.023816 | 94.755960 | 1.027630 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 68.950686 | 0.008291 | 99.965485 | 0.076981 | 68.970037 | 0.000000 | 99.999910 | 0.000000 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 68.946978 | 0.000000 | 99.999912 | 0.000000 | 68.970037 | 0.000000 | 99.999910 | 0.000000 |
9 | F@P-10% | ULPL+Adv | 68.946978 | 0.000000 | 99.999912 | 0.000000 | 68.970037 | 0.000000 | 99.999910 | 0.000000 |
_dataset = "Adult_gender"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.85412444)
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 85.412444 | 0.520416 | 95.088382 | 0.594947 | 86.423089 | 0.486622 | 93.088167 | 1.964352 |
1 | F@P-5% | GD$_{CLA}$ | 83.965358 | 0.357686 | 96.355519 | 1.447544 | 84.832668 | 0.222465 | 96.978229 | 1.793294 |
2 | F@P-5% | GD$_{GLB}$ | 85.295744 | 0.722621 | 96.053106 | 0.461623 | 86.238870 | 0.734119 | 93.717367 | 2.128647 |
3 | F@P-5% | FairBatch | 85.385419 | 0.231151 | 95.196650 | 0.992318 | 86.453792 | 0.280896 | 98.046045 | 0.428465 |
4 | F@P-5% | Adv | 85.588109 | 0.250536 | 95.998231 | 0.585777 | 86.588885 | 0.143025 | 95.778162 | 1.680654 |
5 | F@P-5% | SemiAdv | 85.521774 | 0.116085 | 96.144235 | 0.527803 | 86.509057 | 0.232413 | 95.924229 | 1.173921 |
6 | F@P-5% | ARL | 85.946809 | 0.000000 | 96.017944 | 0.000000 | 86.920479 | 0.000000 | 94.921595 | 0.000000 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 85.321540 | 0.445006 | 95.791947 | 0.782858 | 86.073073 | 0.714004 | 94.029608 | 1.632289 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 85.381733 | 0.140667 | 96.164554 | 0.659786 | 85.790605 | 0.157157 | 95.300776 | 0.833846 |
9 | F@P-5% | ULPL+Adv | 85.457896 | 0.045052 | 96.393440 | 0.606922 | 86.245011 | 0.268542 | 95.020946 | 1.541770 |
0 | F@P-10% | Vanilla | 85.412444 | 0.520416 | 95.088382 | 0.594947 | 86.423089 | 0.486622 | 93.088167 | 1.964352 |
1 | F@P-10% | GD$_{CLA}$ | 83.965358 | 0.357686 | 96.355519 | 1.447544 | 84.832668 | 0.222465 | 96.978229 | 1.793294 |
2 | F@P-10% | GD$_{GLB}$ | 85.295744 | 0.722621 | 96.053106 | 0.461623 | 86.238870 | 0.734119 | 93.717367 | 2.128647 |
3 | F@P-10% | FairBatch | 85.385419 | 0.231151 | 95.196650 | 0.992318 | 86.453792 | 0.280896 | 98.046045 | 0.428465 |
4 | F@P-10% | Adv | 85.588109 | 0.250536 | 95.998231 | 0.585777 | 86.588885 | 0.143025 | 95.778162 | 1.680654 |
5 | F@P-10% | SemiAdv | 85.521774 | 0.116085 | 96.144235 | 0.527803 | 86.509057 | 0.232413 | 95.924229 | 1.173921 |
6 | F@P-10% | ARL | 85.946809 | 0.000000 | 96.017944 | 0.000000 | 86.920479 | 0.000000 | 94.921595 | 0.000000 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 78.887046 | 1.420921 | 98.806325 | 0.953976 | 78.434142 | 1.539845 | 96.434955 | 1.687651 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 85.381733 | 0.140667 | 96.164554 | 0.659786 | 85.790605 | 0.157157 | 95.300776 | 0.833846 |
9 | F@P-10% | ULPL+Adv | 80.073706 | 2.287308 | 98.692674 | 1.172015 | 79.871047 | 2.548598 | 95.087000 | 2.180285 |
_dataset = "Adult_race"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.85412444)
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 76.000000 | 0.000000 | 100.000000 | 0.000000 | 76.000000 | 0.000000 | 100.000000 | 0.000000 |
1 | F@P-5% | GD$_{CLA}$ | 84.300719 | 0.407585 | 81.975519 | 4.573350 | 85.121277 | 0.621918 | 78.206210 | 4.789396 |
2 | F@P-5% | GD$_{GLB}$ | 83.105460 | 0.490344 | 81.312475 | 4.377063 | 83.831747 | 0.652973 | 81.305546 | 1.631060 |
3 | F@P-5% | FairBatch | 85.407530 | 0.144985 | 75.052412 | 5.095453 | 86.263433 | 0.138334 | 74.741923 | 1.261654 |
4 | F@P-5% | Adv | 76.000000 | 0.000000 | 100.000000 | 0.000000 | 76.000000 | 0.000000 | 100.000000 | 0.000000 |
5 | F@P-5% | SemiAdv | 85.387875 | 0.317347 | 75.171285 | 5.024378 | 86.073073 | 0.230376 | 68.501851 | 5.278717 |
6 | F@P-5% | ARL | 80.738284 | 2.035854 | 86.975101 | 7.348920 | 80.589500 | 2.602294 | 77.652552 | 6.307436 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 80.674406 | 3.974085 | 83.619529 | 13.149494 | 80.472828 | 4.783184 | 82.398974 | 13.649132 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 85.277317 | 0.622495 | 77.500018 | 3.733137 | 86.140620 | 0.495358 | 72.560321 | 5.395517 |
9 | F@P-5% | ULPL+Adv | 81.069959 | 1.013107 | 87.282710 | 9.928893 | 80.798281 | 1.265102 | 77.354991 | 2.834802 |
0 | F@P-10% | Vanilla | 76.000000 | 0.000000 | 100.000000 | 0.000000 | 76.000000 | 0.000000 | 100.000000 | 0.000000 |
1 | F@P-10% | GD$_{CLA}$ | 76.382286 | 0.010987 | 99.839588 | 0.358525 | 75.412957 | 0.013731 | 99.927361 | 0.161612 |
2 | F@P-10% | GD$_{GLB}$ | 83.105460 | 0.490344 | 81.312475 | 4.377063 | 83.831747 | 0.652973 | 81.305546 | 1.631060 |
3 | F@P-10% | FairBatch | 85.407530 | 0.144985 | 75.052412 | 5.095453 | 86.263433 | 0.138334 | 74.741923 | 1.261654 |
4 | F@P-10% | Adv | 76.000000 | 0.000000 | 100.000000 | 0.000000 | 76.000000 | 0.000000 | 100.000000 | 0.000000 |
5 | F@P-10% | SemiAdv | 85.387875 | 0.317347 | 75.171285 | 5.024378 | 86.073073 | 0.230376 | 68.501851 | 5.278717 |
6 | F@P-10% | ARL | 77.622689 | 5.305404 | 91.777642 | 10.849550 | 77.295057 | 6.140902 | 85.167660 | 20.796760 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 76.449850 | 0.085152 | 99.277283 | 0.652178 | 75.474363 | 0.070014 | 99.203599 | 0.825535 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 85.277317 | 0.622495 | 77.500018 | 3.733137 | 86.140620 | 0.495358 | 72.560321 | 5.395517 |
9 | F@P-10% | ULPL+Adv | 77.288864 | 1.579146 | 95.101350 | 7.154649 | 76.370893 | 1.760094 | 93.922012 | 8.108020 |
_dataset = "Adult_intersection"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.85412444)
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 85.306799 | 0.360962 | 55.744026 | 7.214055 | 86.140620 | 0.262866 | 52.923663 | 11.771571 |
1 | F@P-5% | GD$_{CLA}$ | 85.666728 | 0.318729 | 53.212019 | 2.935999 | 86.441511 | 0.381386 | 61.346009 | 5.951506 |
2 | F@P-5% | GD$_{GLB}$ | 85.667957 | 0.103199 | 52.436669 | 5.034591 | 86.533620 | 0.237428 | 64.124192 | 2.563232 |
3 | F@P-5% | FairBatch | 83.889196 | 0.464330 | 65.183947 | 5.851908 | 84.482653 | 1.099668 | 48.747531 | 0.827715 |
4 | F@P-5% | Adv | 76.000000 | 0.000000 | 100.000000 | 0.000000 | 76.000000 | 0.000000 | 100.000000 | 0.000000 |
5 | F@P-5% | SemiAdv | 76.000000 | 0.000000 | 100.000000 | 0.000000 | 76.000000 | 0.000000 | 100.000000 | 0.000000 |
6 | F@P-5% | ARL | 85.668366 | 0.194361 | 53.621980 | 2.670074 | 86.378058 | 0.077268 | 61.915972 | 9.160700 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 81.859837 | 3.080762 | 68.880482 | 13.943105 | 81.553577 | 3.511944 | 62.509076 | 1.642198 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 85.263804 | 0.360962 | 65.546022 | 9.802740 | 85.821308 | 0.647464 | 56.652497 | 3.566101 |
9 | F@P-5% | ULPL+Adv | 85.115165 | 0.516297 | 59.823806 | 8.357308 | 85.606386 | 0.597489 | 60.939789 | 7.139558 |
0 | F@P-10% | Vanilla | 85.306799 | 0.360962 | 55.744026 | 7.214055 | 86.140620 | 0.262866 | 52.923663 | 11.771571 |
1 | F@P-10% | GD$_{CLA}$ | 77.295006 | 2.051892 | 88.968696 | 19.889505 | 76.438440 | 2.306782 | 64.568157 | 0.170260 |
2 | F@P-10% | GD$_{GLB}$ | 85.667957 | 0.103199 | 52.436669 | 5.034591 | 86.533620 | 0.237428 | 64.124192 | 2.563232 |
3 | F@P-10% | FairBatch | 83.889196 | 0.464330 | 65.183947 | 5.851908 | 84.482653 | 1.099668 | 48.747531 | 0.827715 |
4 | F@P-10% | Adv | 76.000000 | 0.000000 | 100.000000 | 0.000000 | 76.000000 | 0.000000 | 100.000000 | 0.000000 |
5 | F@P-10% | SemiAdv | 76.000000 | 0.000000 | 100.000000 | 0.000000 | 76.000000 | 0.000000 | 100.000000 | 0.000000 |
6 | F@P-10% | ARL | 85.668366 | 0.194361 | 53.621980 | 2.670074 | 86.378058 | 0.077268 | 61.915972 | 9.160700 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 77.844113 | 3.136077 | 92.245777 | 14.906069 | 76.984955 | 3.494534 | 65.361602 | 1.867078 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 85.263804 | 0.360962 | 65.546022 | 9.802740 | 85.821308 | 0.647464 | 56.652497 | 3.566101 |
9 | F@P-10% | ULPL+Adv | 76.389657 | 0.126772 | 95.866048 | 5.308586 | 75.468222 | 0.137308 | 63.416848 | 1.391134 |
_dataset = "COMPAS_gender"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.67778291 )
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 67.778291 | 1.223190 | 89.448417 | 3.194071 | 65.108911 | 0.878905 | 85.627670 | 2.342891 |
1 | F@P-5% | GD$_{CLA}$ | 66.937644 | 1.142849 | 94.809658 | 1.593586 | 63.960396 | 0.313097 | 91.727893 | 2.647230 |
2 | F@P-5% | GD$_{GLB}$ | 68.609700 | 0.569086 | 90.258730 | 1.883966 | 64.237624 | 1.636511 | 88.673020 | 2.978578 |
3 | F@P-5% | FairBatch | 66.909931 | 0.990975 | 92.039291 | 3.215274 | 64.396040 | 1.396004 | 88.231295 | 3.627183 |
4 | F@P-5% | Adv | 67.842956 | 0.466947 | 91.583757 | 4.181533 | 64.316832 | 1.323922 | 87.093292 | 3.450081 |
5 | F@P-5% | SemiAdv | 68.434180 | 0.718983 | 90.852623 | 2.234303 | 64.752475 | 0.959937 | 86.611808 | 1.089036 |
6 | F@P-5% | ARL | 67.944573 | 0.910607 | 91.721463 | 1.057673 | 64.603960 | 0.729813 | 88.201015 | 2.516849 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 68.452656 | 0.563813 | 92.313216 | 3.388146 | 64.831683 | 1.125408 | 88.299074 | 3.615830 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 68.600462 | 0.268137 | 91.553947 | 1.576586 | 64.554455 | 0.671518 | 87.591832 | 1.870129 |
9 | F@P-5% | ULPL+Adv | 68.230947 | 0.371530 | 91.618599 | 2.133755 | 64.910891 | 0.567042 | 87.625904 | 2.519088 |
0 | F@P-10% | Vanilla | 67.778291 | 1.223190 | 89.448417 | 3.194071 | 65.108911 | 0.878905 | 85.627670 | 2.342891 |
1 | F@P-10% | GD$_{CLA}$ | 66.937644 | 1.142849 | 94.809658 | 1.593586 | 63.960396 | 0.313097 | 91.727893 | 2.647230 |
2 | F@P-10% | GD$_{GLB}$ | 68.609700 | 0.569086 | 90.258730 | 1.883966 | 64.237624 | 1.636511 | 88.673020 | 2.978578 |
3 | F@P-10% | FairBatch | 66.909931 | 0.990975 | 92.039291 | 3.215274 | 64.396040 | 1.396004 | 88.231295 | 3.627183 |
4 | F@P-10% | Adv | 67.842956 | 0.466947 | 91.583757 | 4.181533 | 64.316832 | 1.323922 | 87.093292 | 3.450081 |
5 | F@P-10% | SemiAdv | 68.434180 | 0.718983 | 90.852623 | 2.234303 | 64.752475 | 0.959937 | 86.611808 | 1.089036 |
6 | F@P-10% | ARL | 67.944573 | 0.910607 | 91.721463 | 1.057673 | 64.603960 | 0.729813 | 88.201015 | 2.516849 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 62.096998 | 3.067093 | 96.054051 | 2.535753 | 59.960396 | 2.045459 | 90.207334 | 4.344703 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 68.600462 | 0.268137 | 91.553947 | 1.576586 | 64.554455 | 0.671518 | 87.591832 | 1.870129 |
9 | F@P-10% | ULPL+Adv | 68.230947 | 0.371530 | 91.618599 | 2.133755 | 64.910891 | 0.567042 | 87.625904 | 2.519088 |
_dataset = "COMPAS_race"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.67778291 )
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 68.092379 | 0.944570 | 67.198905 | 0.828365 | 64.792079 | 1.580441 | 70.920619 | 2.498510 |
1 | F@P-5% | GD$_{CLA}$ | 66.909931 | 1.406920 | 71.984431 | 3.346656 | 63.089109 | 1.757809 | 75.484419 | 2.294409 |
2 | F@P-5% | GD$_{GLB}$ | 66.771363 | 1.188692 | 72.117383 | 3.681672 | 63.841584 | 2.358016 | 74.670323 | 1.868329 |
3 | F@P-5% | FairBatch | 65.893764 | 2.093760 | 76.689815 | 8.858575 | 64.000000 | 2.111489 | 75.497509 | 3.350448 |
4 | F@P-5% | Adv | 56.000000 | 0.000000 | 100.000000 | 0.000000 | 56.000000 | 0.000000 | 100.000000 | 0.000000 |
5 | F@P-5% | SemiAdv | 66.327945 | 1.165767 | 73.505128 | 2.820835 | 64.237624 | 1.683751 | 78.367950 | 3.000537 |
6 | F@P-5% | ARL | 67.325635 | 0.733670 | 71.258652 | 0.854484 | 63.603960 | 0.797014 | 72.936190 | 0.943461 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 67.538106 | 0.796951 | 71.444062 | 2.065372 | 64.198020 | 1.423816 | 73.902555 | 3.597940 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 67.297921 | 0.510178 | 71.652411 | 1.041175 | 63.643564 | 1.201248 | 74.516125 | 1.933373 |
9 | F@P-5% | ULPL+Adv | 66.558891 | 1.643784 | 72.990955 | 3.705776 | 63.603960 | 2.129979 | 76.049793 | 3.151541 |
0 | F@P-10% | Vanilla | 68.092379 | 0.944570 | 67.198905 | 0.828365 | 64.792079 | 1.580441 | 70.920619 | 2.498510 |
1 | F@P-10% | GD$_{CLA}$ | 57.810624 | 5.575978 | 86.012469 | 5.118951 | 57.782178 | 3.251989 | 84.420219 | 6.275847 |
2 | F@P-10% | GD$_{GLB}$ | 66.152425 | 2.945528 | 73.861383 | 3.814308 | 62.534653 | 1.416914 | 77.020795 | 3.775631 |
3 | F@P-10% | FairBatch | 65.090069 | 2.168787 | 77.007973 | 5.084144 | 61.623762 | 2.175516 | 76.763936 | 4.031853 |
4 | F@P-10% | Adv | 56.000000 | 0.000000 | 100.000000 | 0.000000 | 56.000000 | 0.000000 | 100.000000 | 0.000000 |
5 | F@P-10% | SemiAdv | 60.424942 | 6.154672 | 78.460387 | 6.516605 | 60.356436 | 4.157001 | 83.248302 | 9.289408 |
6 | F@P-10% | ARL | 59.722864 | 5.519099 | 85.485285 | 12.044706 | 57.940594 | 5.159166 | 85.754825 | 11.343030 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 59.806005 | 1.200657 | 86.512624 | 3.557908 | 57.940594 | 1.752224 | 86.242875 | 4.594808 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 66.161663 | 0.550991 | 73.281673 | 3.386462 | 61.980198 | 1.873355 | 77.000498 | 3.577178 |
9 | F@P-10% | ULPL+Adv | 63.334873 | 2.030074 | 75.016798 | 4.062735 | 61.821782 | 1.849128 | 79.810122 | 2.546277 |
_dataset = "COMPAS_intersection"
make_plot(
get_renamed_main_results(_dataset),
figure_name = _dataset,
figsize=(7.5, 4))
dataset_specific_selected_results(_dataset, vanilla_performance = 0.67778291 )
Selection | Models | test_performance mean | test_performance std | test_fairness mean | test_fairness std | dev_performance mean | dev_performance std | dev_fairness mean | dev_fairness std | |
---|---|---|---|---|---|---|---|---|---|---|
0 | F@P-5% | Vanilla | 67.630485 | 1.137704 | 68.354063 | 3.665127 | 63.920792 | 1.223884 | 66.467300 | 2.303214 |
1 | F@P-5% | GD$_{CLA}$ | 67.639723 | 0.786305 | 66.890612 | 2.223865 | 64.000000 | 1.149539 | 69.340189 | 1.035730 |
2 | F@P-5% | GD$_{GLB}$ | 67.593533 | 0.799090 | 69.410431 | 3.813653 | 63.485149 | 1.796423 | 68.164726 | 2.671022 |
3 | F@P-5% | FairBatch | 67.889145 | 0.488167 | 71.199716 | 2.633420 | 64.277228 | 1.025123 | 68.531732 | 1.028311 |
4 | F@P-5% | Adv | 56.000000 | 0.000000 | 100.000000 | 0.000000 | 56.000000 | 0.000000 | 100.000000 | 0.000000 |
5 | F@P-5% | SemiAdv | 56.000000 | 0.000000 | 100.000000 | 0.000000 | 56.000000 | 0.000000 | 100.000000 | 0.000000 |
6 | F@P-5% | ARL | 68.706697 | 0.424591 | 67.747048 | 0.934790 | 63.366337 | 0.840127 | 70.523890 | 1.242931 |
7 | F@P-5% | ULPL+GD$_{CLA}$ | 68.064665 | 1.298078 | 69.025258 | 2.197286 | 64.158416 | 1.320957 | 69.012945 | 0.758773 |
8 | F@P-5% | ULPL+GD$_{GLB}$ | 67.935335 | 0.397873 | 69.390191 | 2.317468 | 63.762376 | 0.779605 | 68.987717 | 2.991714 |
9 | F@P-5% | ULPL+Adv | 67.473441 | 0.460042 | 70.387976 | 4.657340 | 63.009901 | 1.263298 | 70.460319 | 1.711306 |
0 | F@P-10% | Vanilla | 67.630485 | 1.137704 | 68.354063 | 3.665127 | 63.920792 | 1.223884 | 66.467300 | 2.303214 |
1 | F@P-10% | GD$_{CLA}$ | 59.842956 | 4.357326 | 81.445579 | 4.378875 | 58.574257 | 2.474851 | 78.818469 | 0.843710 |
2 | F@P-10% | GD$_{GLB}$ | 64.295612 | 3.899516 | 75.871936 | 4.371375 | 60.871287 | 2.459753 | 75.447562 | 1.519480 |
3 | F@P-10% | FairBatch | 61.598152 | 2.025656 | 79.112535 | 5.262433 | 58.336634 | 3.076007 | 74.716738 | 5.601259 |
4 | F@P-10% | Adv | 56.000000 | 0.000000 | 100.000000 | 0.000000 | 56.000000 | 0.000000 | 100.000000 | 0.000000 |
5 | F@P-10% | SemiAdv | 56.000000 | 0.000000 | 100.000000 | 0.000000 | 56.000000 | 0.000000 | 100.000000 | 0.000000 |
6 | F@P-10% | ARL | 61.685912 | 9.885929 | 77.412720 | 14.080002 | 60.000000 | 6.863417 | 73.903220 | 15.185400 |
7 | F@P-10% | ULPL+GD$_{CLA}$ | 61.163972 | 2.888480 | 80.149577 | 4.406002 | 58.851485 | 3.154052 | 74.556133 | 7.805536 |
8 | F@P-10% | ULPL+GD$_{GLB}$ | 67.935335 | 0.397873 | 69.390191 | 2.317468 | 63.762376 | 0.779605 | 68.987717 | 2.991714 |
9 | F@P-10% | ULPL+Adv | 65.043880 | 2.786793 | 75.179807 | 5.006842 | 60.910891 | 1.332778 | 71.374800 | 2.266838 |