import pandas as pd
import itertools
import random
from scipy.stats import bernoulli
import numpy as np
import copy
import traceback


import os
import sys
sys.path.append("..")
sys.path.append("GerryFair")
import gerryfair


from util_functions_cbs_benchmarking import *
from multiaccuracy_funcs import * 

from conditional_bias_scan import ConditionalBiasScan
from cbs_preprocessor import CBSPreProcessor
from cbs_logger import CBSLogger
from yaml_funcs import YamlFunctions
from dataset_specific_funcs import DatasetSpecificFuncs
from sklearn import linear_model


import matplotlib.pyplot as plt
import time

from  scipy.stats import pearsonr

import multiprocessing as mp

# read in data set

timestr = time.strftime("_%Y%m%d_%H%M%S")
##experiment_name = "1A"


df = pd.read_csv("../../toy_datasets/COMPAS_v2.csv")

#df = pd.read_csv("../../toy_datasets/noncorrelation_synthetic_compas.csv")

#df = pd.concat([df,df,df,df, df, df,df, df] , axis = 0, ignore_index= True)

# limit data to features only

df = df[['Under 25','Prior Offenses','Race','ChargeDegree','Sex']]

# creating deep copy of original dataset
df_copy = df.copy(deep =True)
deep_df_copy = df_copy.copy(deep = True)

# list of feature columns
cols_copy =  ["Under 25", "Prior Offenses", "Race", "ChargeDegree", "Sex"]


######################## MODERATE VALUES ######################################
### setting fixed values
sigma = .2

# noise for producing true log-odds
sigma_true = .6

# probability of selecting a feature value
pr_subset = .5

# number of features to select
num_affected =2

###############################################################################

# shift of true log-odds for protected class (+Delta) and non-protected class (-Delta)
_Delta = 0

# shift of true log-odds for bias subset in protected class (+delta) and bias subset in non-protected class (-delta)
_delta = 0



# configs file for CBS
#yaml_configs_path = "../fsscan_yamls/fsscan_configs-CBS_benchmark.yaml"
yaml_configs_path = "../fsscan_yamls/fsscan_configs-CBS_benchmark.yaml"
#yaml_configs_path = "../fsscan_yamls/fsscan_configs-CBS_benchmark-binary_sufficiency_scan_CV.yaml"

#making temp copies

## needed for benchmark tests
dataset = None
attributes = None
centered = True
#######

# needed for parrallel processing
active_workers = 0
completed_workers = 0

active_processes_list = []
active_worker_constant = 10
#


def run_wrapper( run_info):
    #run_info = run_info[0]
    
    scan_params = run_info["scan_params"]
    #print(scan_params)
    #print(list(scan_params))
    scan = scan_params["scan_info"]
    dataset_yaml = scan_params["dataset_yaml"]
    data = scan_params["data"]
    p_bin_var = scan_params["p_bin_var"]
    tilde_probability_var = scan_params["tilde_probability_var"]
    
    df_t = scan_params["df_t"] 
    df_copy = scan_params["df_copy"] 
    
    s_bias = run_info["selected_bias_subset"]
    group_ind = run_info["group_ind"]
    key, key_value = run_info["protected_class"] 
    experiment_name = scan_params["experiment_name"]
    print("printing scan type:::")
    print(scan["scan_type"])
    print(str(run_info["run_number"]))
    print(str(run_info["mu"]))
    
    cbs = ConditionalBiasScan( scan["protected_class"], scan["protected_value"], scan["combo"], scan["event"] ,scan["conditional_variable"], fsscan_configs["fsscan_params"],scan["direction"], scan["feature_list"], scan["scan_type"] , scan["scan_feature_list"], scan["threshold_probability"], scan["threshold_cutoff"] )
    results =  cbs.run(dataset_yaml, data,p_bin_var, tilde_probability_var)
    
    stats_dict = cbs_logger.write_results(results["best_subset"], 
                             results["best_score"], 
                             results["best_param"], 
                             results["treatment"], 
                             results["treatment_events"], 
                             results["treatment_p_hat"], 
                             results["controls"],
                             results["control_events"],
                            results["control_conditional_var"],
                            results["treatment_conditional_var"],
                            results["dataset_yaml"],
                            scan["protected_class"],
                            scan["protected_value"], 
                            scan["combo"],
                            scan["event"],
                            scan["conditional_variable"],
                            fsscan_configs["fsscan_params"],
                            scan["direction"],
                            scan["feature_list"],
                            scan["scan_type"],
                            scan["scan_feature_list"],
                            "", add_scores = True, include_conditional_var_base_rates = True )
    
    other_dict = cbs_logger.write_results(s_bias, 
                             -1000000000, 
                             np.inf, 
                             results["treatment"], 
                             results["treatment_events"], 
                             results["treatment_p_hat"], 
                             results["controls"],
                             results["control_events"],
                            results["control_conditional_var"],
                            results["treatment_conditional_var"],
                            results["dataset_yaml"],
                            scan["protected_class"],
                            scan["protected_value"], 
                            scan["combo"],
                            scan["event"],
                            scan["conditional_variable"],
                            fsscan_configs["fsscan_params"],
                            scan["direction"],
                            scan["feature_list"],
                            scan["scan_type"],
                            scan["scan_feature_list"], "_for_s_bias", add_scores = True, include_conditional_var_base_rates = True)
    
    del run_info["scan_params"]
    #experiment_name
    

    
    print("coefficients used for variable of logistic regression used to produce \hat p: ")
    print(results["p_hat_coefficient_mapping"])
   # print("tilde_p's coefficient is "+str(results["p_hat_coefficient_mapping"][scan["conditional_variable"]]))
            
    print("best subset found : " + str(results["best_subset"]))
    print("best score : " + str(results["best_score"]))
    print("param for best scoring subset : "+ str(results["best_param"]))
            
    s_found_subset = results["best_subset"]
    print(s_found_subset)
    print("accuracy:")
    print(compute_accuracy(df_t, s_bias,s_found_subset,group_ind))
            
    run_info["combo"] = scan["combo"]
    run_info["event"] = scan["event"]
    run_info["experiment_name"] = experiment_name
    run_info["conditional_variable"] = scan["conditional_variable"]
    run_info["fsscan_params"] = fsscan_configs["fsscan_params"]
    run_info["direction"] = scan["direction"]
    run_info["feature_list"] = scan["feature_list"]
    run_info["scan_type"] = scan["scan_type"] 
    run_info["scan_feature_list"] = scan["scan_feature_list"]
    run_info["threshold_probability"] = scan["threshold_probability"]
    run_info["threshold_cutoff"] = scan["threshold_cutoff"]
            
            
    run_info["best_subset"]  = results["best_subset"] 
    run_info["best_score"] = results["best_score"]
    run_info["best_param"] =  results["best_param"]

    run_info["cbs_accuracy"] =  compute_accuracy(df_t, s_bias,results["best_subset"], group_ind)
    run_info["cbs_precision"] =  compute_precision(df_t, s_bias,results["best_subset"], group_ind)
    run_info["cbs_recall"] =  compute_recall(df_t, s_bias,results["best_subset"], group_ind)
    run_info["cbs_param"] = results["best_param"]
    run_info["cbs_score"] = results["best_score"]
    run_info["p_hat_coefficient_mapping"] = results["p_hat_coefficient_mapping"]
    run_info["experiment_name"] = experiment_name
            
            #### for prediction separation
            ######## run benchmark testsw
            
    if "prediction_separation" in scan["scan_type"]:
                
        recommendations =  df_t["predicted_probs"]
        df_p = df_t[['Under 25', 'Prior Offenses', 'Race', 'ChargeDegree', 'Sex', 'ReoffendedWithinTwoYears', group_ind]]
        del df_p[key]
                
        
        # gerryfair - all features FP

        attributes_df = create_attributes_data_all_protected(df_p, 'ReoffendedWithinTwoYears')
        X, X_prime, y = gerryfair.clean.clean_dataset(dataset, attributes, centered, data = df_p , attributes_df = attributes_df)
        auditor = gerryfair.model.Auditor(X_prime, y, 'FP')
        [violated_group, fairness_violation, group] = auditor.audit(recommendations, under_estimation = True)

        run_info["gerryfair_all_features_accuracy_FP"] = compute_accuracy_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_precision_FP"] = compute_precision_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_recall_FP"] = compute_recall_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_fairness_violation_score_FP"] = fairness_violation

        # gerryfair - one feature FP

        attributes_df = create_attributes_data_one_protected(df_p, 'ReoffendedWithinTwoYears', group_ind)
        X, X_prime, y = gerryfair.clean.clean_dataset(dataset, attributes, centered, data = df_p , attributes_df = attributes_df)

        auditor = gerryfair.model.Auditor(X_prime, y, 'FP')
        [violated_group, fairness_violation, group] = auditor.audit(recommendations, under_estimation = True)
        run_info["gerryfair_one_features_accuracy_FP"] = compute_accuracy_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_precision_FP"] = compute_precision_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_recall_FP"] = compute_recall_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_fairness_violation_score_FP"] = fairness_violation
        
        # gerryfair - all features FN

        attributes_df = create_attributes_data_all_protected(df_p, 'ReoffendedWithinTwoYears')
        X, X_prime, y = gerryfair.clean.clean_dataset(dataset, attributes, centered, data = df_p , attributes_df = attributes_df)
        auditor = gerryfair.model.Auditor(X_prime, y, 'FN')
        [violated_group, fairness_violation, group] = auditor.audit(recommendations, under_estimation = True)

        run_info["gerryfair_all_features_accuracy_FN"] = compute_accuracy_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_precision_FN"] = compute_precision_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_recall_FN"] = compute_recall_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_all_features_fairness_violation_score_FN"] = fairness_violation

        # gerryfair - one feature FN

        attributes_df = create_attributes_data_one_protected(df_p, 'ReoffendedWithinTwoYears', group_ind)
        X, X_prime, y = gerryfair.clean.clean_dataset(dataset, attributes, centered, data = df_p , attributes_df = attributes_df)

        auditor = gerryfair.model.Auditor(X_prime, y, 'FN')
        [violated_group, fairness_violation, group] = auditor.audit(recommendations, under_estimation = True)
                #print(group)
        run_info["gerryfair_one_features_accuracy_FN"] = compute_accuracy_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_precision_FN"] = compute_precision_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_recall_FN"] = compute_recall_gerryfair(df_p, violated_group, s_bias,group_ind)
        run_info["gerryfair_one_features_fairness_violation_score_FN"] = fairness_violation

        # run multiaccuracy
        
        print("Running multi-accuracy")

        outcomes = df_p["ReoffendedWithinTwoYears"]
        del df_p["ReoffendedWithinTwoYears"]

        res_data = run_multiaccuracy(df_p, df_t["predicted_log_odds"], outcomes, "FP")
        corr, ma_id = find_highest_corr(res_data)

        run_info["multiaccuracy_accuracy"] = compute_accuracy_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_precision"] = compute_precision_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_recall"] = compute_recall_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_score"] = corr

        res_data = run_multiaccuracy_org(df_p, df_t["predicted_log_odds"], outcomes, "FP")
        corr, ma_id = find_highest_corr(res_data)

        run_info["multiaccuracy_accuracy_org"] = compute_accuracy_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_precision_org"] = compute_precision_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_recall_org"] = compute_recall_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_score_org"] = corr

        res_data = run_multiaccuracy_exp(df_p, df_t["predicted_log_odds"], outcomes, "FP")
        corr, ma_id = find_highest_corr(res_data)

        run_info["multiaccuracy_accuracy_exp"] = compute_accuracy_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_precision_exp"] = compute_precision_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_recall_exp"] = compute_recall_gerryfair(df_p, list(ma_id), s_bias,group_ind)
        run_info["multiaccuracy_score_exp"] = corr

        #run_info_deep_copy = copy.deepcopy(run_info)
        #run_infos.append(run_info_deep_copy)

    run_info = {**run_info , **stats_dict}
    run_info = {**run_info , **other_dict}
    
    file_name = "Benchmark_results/_"+ timestr +"/bias_results"+ "/_run_num_" + str(run_info["run_number"]) + "_"+str(experiment_name)+"_"+ str(run_info["varying_parameter"])+"_"+scan["scan_type"]+"_sigmapred_"+ str(run_info["sigma"]) +"_mu_"+str(run_info["mu"])+".csv"
    pd.DataFrame([run_info]).to_csv(file_name)

    
# create new folder
folder_path = "Benchmark_results/"+"_"+ timestr 
folder_path_bias = "Benchmark_results/"+"_"+ timestr +"/bias_results"
folder_path_org = "Benchmark_results/_" +timestr +"/original_data_sets"
os.mkdir(folder_path)
os.mkdir(folder_path_bias)
os.mkdir(folder_path_org)

simulations_to_run = ["1A", "1B", "1CD", "1E", "2A", "2B", "2CD", "2E", "3", "1A_suff", "1B_suff", "1CD_suff", "1E_suff","3_suff"]
#simulations_to_run = ["1A"]
#simulations_to_run = ["1A", "1B",  "1E","3", "4", "2A", "5A", "5B"]
#simulations_to_run = ["1A", "2A"]
#simulations_to_run = ["1A", "1A_suff"]
#simulations_to_run = ["2A"]

sigma_coef = .2

for run_number in range(0,100):
    unsuccessful = True
    run_infos = [] 
    print("running run number "+str(run_number))
    while (unsuccessful == True):
        print("trying again for "+str(run_number))
        df_copy = deep_df_copy.copy(deep = True)
        run_infos = [] 
        ######################## MODERATE VALUES ######################################
        ### setting fixed values
        sigma = .2

        # noise for producing true log-odds
        sigma_true = .6

        # probability of selecting a feature value
        pr_subset = .5

        # number of features to select
        num_affected =2

        ###############################################################################

        # shift of true log-odds for protected class (+Delta) and non-protected class (-Delta)
        _Delta = 0

        # shift of true log-odds for bias subset in protected class (+delta) and bias subset in non-protected class (-delta)
        _delta = 0
        
        sigma_coef =.2

        try:
            run_infos = [] 
            run_info = {}
            run_info["run_number"] = run_number

            df_t = df_copy.copy(deep = True)
            cols_t = copy.deepcopy(cols_copy)

            df_t, cols_t, key, key_value, group_ind, s_bias, len_p, len_c, true_log_odds, coefficient_map = pick_protected_class_bias_subset_no_filter_gaussian(df_t, cols_t, 150, num_affected, pr_subset, _Delta,sigma_true, sigma_coef)
            # copying outcomes over to COMPAS dataset
            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
            df_copy_within_run = df_copy.copy(deep = True)

            print("Protected class:"+ key+ " : " + str(key_value))
            print("Biased subset: "+ str(s_bias))
            print("Number of individuals in bias subset for protected class: " + str(len_p))
            print("Number of individuals in bias subset for non-protected class: " + str(len_c))
            print("Coefficients used to protected true log-odds: "+ str(coefficient_map))

            to_choose =  df_t[s_bias.keys()].isin(s_bias).all(axis=1).astype(int)
            df_t["in_bias_subset"] = to_choose

            run_info["protected_class"] = (key, key_value)
            #run_info["_Delta"] = _Delta
            #run_info["sigma_true"] = sigma_true

            run_info["coefficient_values"] = coefficient_map


            run_info["selected_bias_subset"] = s_bias
            run_info["group_ind"] = group_ind
            run_info["number_of_rows_protected_class_bias_subset"] = len_p
            run_info["number_of_rows_control_bias_subset"] = len_c

            mu_values = [i/10 for i in list(range(0,41))]
            #mu_values = [0,1,4]
            sigma_predicts = [.20]
            mu_suff = 0

            df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+".csv")

            # DIFFERENCES IN INJECTED BIAS
            
            if "1A" in simulations_to_run:

                # 1A
                for sigma in sigma_predicts:
                    for mu in mu_values: 
                        print("running for mu: " + str(mu))

                        # add bias
                        add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                        run_info["_delta"] = _delta
                        run_info["mu"] = mu
                        run_info["mu_suff"] = mu_suff
                        run_info["sigma"] = sigma
                        run_info["sigma_true"] = sigma_true
                        run_info["_Delta"] = _Delta
                        run_info["mu_suff"] = 0
                        run_info["num_affected"] = num_affected
                        run_info["pr_subset"] = pr_subset

                        run_info["varying_parameter"] = mu

                        log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                        df_copy["tilde_p"] = df_t["predicted_probs"]
                        #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                        if (mu == 0):
                            df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1A_"+ str(mu)+"_sigma_predict_"+str(sigma)+"_.csv")
                            df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1A_"+ str(mu)+"_sigma_predict_"+str(sigma)+"_full_.csv")



                        yaml_funcs = YamlFunctions(yaml_configs_path)
                        fsscan_configs = yaml_funcs.run()

                        #setting up logger
                        cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                        # performing initial data preprocessing, in this case there is not any
                        data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                        data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                        # producing all scans in config file
                        scans = yaml_funcs.produce_scans(data, key, key_value)
                        for scan in scans:
                            print(scan)

                            scan_params = {}
                            scan_params["scan_info"] = scan
                            scan_params["dataset_yaml"] = dataset_yaml
                            scan_params["data"] =  data.copy(deep =True)
                            scan_params["p_bin_var"] = p_bin_var
                            scan_params["tilde_probability_var"] = tilde_probability_var
                            scan_params["df_t"] = df_t.copy(deep = True)
                            scan_params["df_copy"] = df_copy.copy(deep = True)
                            scan_params["experiment_name"] = "1A"
                            run_info["scan_params"] = scan_params
                            run_info_deep_copy = copy.deepcopy(run_info)
                            run_infos.append(run_info_deep_copy)
            
            df_copy = deep_df_copy.copy(deep =True)
            mu_suff = 0
            
            mu = 0
            mu_values_suff = [i/10 for i in list(range(0,41))]
            sigma_predicts = [.20]

            df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+".csv")
            
            copy_log_odds_org = copy.deepcopy(list(df_t["true_log_odds"]))
            copy_reffonding_outcomes = copy.deepcopy(list(df_t["ReoffendedWithinTwoYears"]))

            # DIFFERENCES IN INJECTED BIAS
            
            if "1A_suff" in simulations_to_run:

                # 1A
                for sigma in sigma_predicts:
                    for mu_suff in mu_values_suff: 
                        print("running for mu: " + str(mu))
                        
                        df_t["true_log_odds"] = copy_log_odds_org

                        # add bias
                        add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                        run_info["_delta"] = _delta
                        run_info["mu"] = mu
                        run_info["mu_suff"] = mu_suff
                        run_info["sigma"] = sigma
                        run_info["sigma_true"] = sigma_true
                        run_info["_Delta"] = _Delta
                        run_info["num_affected"] = num_affected
                        run_info["pr_subset"] = pr_subset

                        run_info["varying_parameter"] = mu_suff

                        log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                        df_copy["tilde_p"] = df_t["predicted_probs"]
                        #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]
                        
                        add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, (-1*mu_suff), group_ind)
                        df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                        if (mu_suff == 0):
                            df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1A_suff_"+ str(mu_suff)+"_sigma_predict_"+str(sigma)+"_.csv")
                            df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1A_suff_"+ str(mu_suff)+"_sigma_predict_"+str(sigma)+"_full_.csv")



                        yaml_funcs = YamlFunctions(yaml_configs_path)
                        fsscan_configs = yaml_funcs.run()

                        #setting up logger
                        cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                        # performing initial data preprocessing, in this case there is not any
                        data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                        data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                        # producing all scans in config file
                        scans = yaml_funcs.produce_scans(data, key, key_value)
                        for scan in scans:
                            print(scan)

                            scan_params = {}
                            scan_params["scan_info"] = scan
                            scan_params["dataset_yaml"] = dataset_yaml
                            scan_params["data"] =  data.copy(deep =True)
                            scan_params["p_bin_var"] = p_bin_var
                            scan_params["tilde_probability_var"] = tilde_probability_var
                            scan_params["df_t"] = df_t.copy(deep = True)
                            scan_params["df_copy"] = df_copy.copy(deep = True)
                            scan_params["experiment_name"] = "1A_suff"
                            run_info["scan_params"] = scan_params
                            run_info_deep_copy = copy.deepcopy(run_info)
                            run_infos.append(run_info_deep_copy)

            # 1B

            df_copy = df_copy_within_run.copy(deep =True)
            
            
            sigma_true = .6
            mu = 1
            mu_suff = 0
            sigmas  = [i/10 for i in list(range(0,21))]
            if "1B" in simulations_to_run:
                for sigma in sigmas:
                    print("running for sigma: " + str(sigma))
                    
                    df_t["true_log_odds"] = copy_log_odds_org

                    # add bias
                    add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["sigma_true"] = sigma_true
                    run_info["_Delta"] = _Delta
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    
                    

                    if ((sigma == 0) or (sigma == .25)):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1B_"+ str(sigma)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1B_"+ str(sigma)+"_full_.csv")
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "1B"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            
            df_copy = df_copy_within_run.copy(deep =True)
            
            
            #1B_suff
            
            sigma_true = .6
            mu = 0
            mu_suff = 1
            sigmas  = [i/10 for i in list(range(0,21))]
            if "1B_suff" in simulations_to_run:
                for sigma in sigmas:
                    print("running for sigma: " + str(sigma))
                    
                    df_t["true_log_odds"] = copy_log_odds_org

                    # add bias
                    add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["sigma_true"] = sigma_true
                    run_info["_Delta"] = _Delta
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    
                    add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, (-1*mu_suff), group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                    if ((sigma == 0) or (sigma == .25)):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1B_suff_"+ str(sigma)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1B_suff_"+ str(sigma)+"_full_.csv")
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "1B_suff"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)

            #2A

            df_copy = df_copy_within_run.copy(deep =True)

            mu_values = [0]
            mu_suff = 0
            sigma_predicts = [.20]
            sigma_true = .6
            _delta_values = [(i/10)-4 for i in list(range(0,81))]
            #_delta_values = [-4,-3,-2,-1,0]

            #copy_log_odds_org = copy.deepcopy(list(df_t["true_log_odds"]))
            #copy_reffonding_outcomes = copy.deepcopy(list(df_t["ReoffendedWithinTwoYears"]))
            
            if "2A" in simulations_to_run:
                
                for sigma in sigma_predicts:
                    for mu in mu_values:

                        for _delta_value in _delta_values: 
                            print("running for _delta: " + str(_delta_value))

                            df_t["true_log_odds"] = copy_log_odds_org
                            add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, _delta_value, group_ind)
                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]



                            # add bias
                            add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                            run_info["_delta"] = _delta_value
                            run_info["mu"] = mu
                            run_info["mu_suff"] = mu_suff
                            run_info["sigma"] = sigma
                            run_info["sigma_true"] = sigma_true
                            run_info["_Delta"] = _Delta
                            run_info["num_affected"] = num_affected
                            run_info["pr_subset"] = pr_subset

                            run_info["varying_parameter"] = _delta_value

                            log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                            df_copy["tilde_p"] = df_t["predicted_probs"]

                            #CHANGE BACK POST EXPERIMENT
                            if (_delta_value == 0):
                            #if True:
                                df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2A_"+ str(_delta_value)+"_sigmapred_"+str(sigma)+"_mu_"+str(mu)+"_.csv")
                                df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2A_"+ str(_delta_value)+"_sigmapred_"+str(sigma)+"_mu_"+str(mu)+"_full_.csv")
                                #df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2At_"+ str(_delta_value)+"_.csv")
                            #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                            yaml_funcs = YamlFunctions(yaml_configs_path)
                            fsscan_configs = yaml_funcs.run()

                            #setting up logger
                            cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                            # performing initial data preprocessing, in this case there is not any
                            data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                            data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                            # producing all scans in config file
                            scans = yaml_funcs.produce_scans(data, key, key_value)
                            for scan in scans:
                                print(scan)

                                scan_params = {}
                                scan_params["scan_info"] = scan
                                scan_params["dataset_yaml"] = dataset_yaml
                                scan_params["data"] =  data.copy(deep =True)
                                scan_params["p_bin_var"] = p_bin_var
                                scan_params["tilde_probability_var"] = tilde_probability_var
                                scan_params["df_t"] = df_t.copy(deep = True)
                                scan_params["df_copy"] = df_copy.copy(deep = True)
                                scan_params["experiment_name"] = "2A"
                                run_info["scan_params"] = scan_params
                                run_info_deep_copy = copy.deepcopy(run_info)
                                run_infos.append(run_info_deep_copy)
                                
             #2B
    
            df_copy = df_copy_within_run.copy(deep = True)

            mu = 0
            mu_suff = 0
            _Delta = 0
            _delta = 0.5
            sigma_true=.6
            sigma = .2
            sigmas  = [i/10 for i in list(range(0,21))]
            num_affected = 2
            pr_subset = .5
            

            #df_t = df_copy.copy(deep = True)

            if "2B" in simulations_to_run:
                for sigma_value in sigmas:
                    print("running for sigma: " + str(sigma_value))

                    df_t["true_log_odds"] = copy_log_odds_org
                    add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, _delta, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                    # add bias
                    add_bias_shifted_by_mu(df_t, s_bias, mu, sigma_value, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma_value
                    run_info["sigma_true"] = sigma_true
                    run_info["_Delta"] = _Delta
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if ((sigma_value == 0) or (sigma_value == .25)):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2B_"+ str(sigma_value)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2B_"+ str(sigma_value)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "2B"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)

            #3

            df_copy = df_copy_within_run.copy(deep =True)
            mu = 1
            mu_suff = 0
            sigma = .2
            sigma_true = .6
            _delta_values = [((i/10)-1.0) for i in list(range(0,21))]

            if "3" in simulations_to_run:
                for _delta_value in _delta_values: 
                    print("running for _delta: " + str(_delta_value))

                    df_t["true_log_odds"] = copy_log_odds_org
                    add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, _delta_value, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]



                    # add bias
                    add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta_value
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["sigma_true"] = sigma_true
                    run_info["_Delta"] = _Delta
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = _delta_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (_delta == 0):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_3_"+ str(_delta)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_3_"+ str(_delta)+"_full_.csv")



                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "3"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
                        
                        df_copy = df_copy_within_run.copy(deep =True)
            
            
            
            #3_suff
            
            df_copy = df_copy_within_run.copy(deep =True)
            mu = 0
            mu_suff = 1
            sigma = .2
            sigma_true = .6
            _delta_values = [((i/10)-1.0) for i in list(range(0,21))]

            if "3_suff" in simulations_to_run:
                for _delta_value in _delta_values: 
                    print("running for _delta: " + str(_delta_value))

                    df_t["true_log_odds"] = copy_log_odds_org
                    add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, _delta_value, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]



                    # add bias
                    add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta_value
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["sigma_true"] = sigma_true
                    run_info["_Delta"] = _Delta
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = _delta_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]
                    
                    add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, (-1*mu_suff), group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                    if (_delta == 0):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_3_suff_"+ str(_delta)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_3_suff_"+ str(_delta)+"_full_.csv")



                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "3_suff"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)

            #4
            mu = 1
            sigma = .2
            sigma_true = .6
            _delta = 0

            _Delta_values = [((i/10.0)-1.0) for i in list(range(0,21))]
            
            if "4" in simulations_to_run:

                for _Delta_value in _Delta_values: 
                    print("running for _Delta: " + str(_Delta))

                    df_t["true_log_odds"] = copy_log_odds_org
                    add_bias_shifted_log_odds_by_group_ind_choose_new_outcomes(df_t, _Delta_value, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]



                    # add bias
                    add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta_value
                    run_info["sigma_true"] = sigma_true
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = _Delta_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (_Delta == 0):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_4_"+ str(_Delta)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_4_"+ str(_Delta)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "4"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            #1E
            mu = 1
            sigma = .2
            sigma_true = .6
            _delta = 0
            _Delta = 0
            mu_suff = 0

            sigma_true_values   = [(i/10.0) for i in list(range(0,21))]
            #deep_df_copy = df_copy.copy(deep = True)
            
            if "1E" in simulations_to_run:
                for sigma_true_value in sigma_true_values: 
                    print("running for sigma_true_value: " + str(sigma_true_value))

                    log_odds, _, _ = produce_true_log_odds(df_t, cols_t,group_ind, _Delta,sigma_true_value, coefficient_map )
                    df_t["true_log_odds"] = log_odds

                    log_odds_to_prob(df_t, "true_log_odds", "true_probs")

                    df_t["ReoffendedWithinTwoYears"] = df_t["true_probs"].apply(singular_row_bernoulli_draw)
                    add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta
                    run_info["sigma_true"] = sigma_true_value
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma_true_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (sigma_true_value == 0) or (sigma_true_value == .25):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1E_"+ str(sigma_true_value)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1E_"+ str(sigma_true_value)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "1E"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            
            #1E_suff
            
            mu = 0
            mu_suff = 1
            sigma = .2
            sigma_true = .6
            _delta = 0
            _Delta = 0

            sigma_true_values   = [(i/10.0) for i in list(range(0,21))]
            #deep_df_copy = df_copy.copy(deep = True)
            
            if "1E_suff" in simulations_to_run:
                for sigma_true_value in sigma_true_values: 
                    print("running for sigma_true_value: " + str(sigma_true_value))
                    
                    df_t["true_log_odds"] = copy_log_odds_org

                    log_odds, _, _ = produce_true_log_odds(df_t, cols_t,group_ind, _Delta,sigma_true_value, coefficient_map )
                    df_t["true_log_odds"] = log_odds

                    log_odds_to_prob(df_t, "true_log_odds", "true_probs")

                    df_t["ReoffendedWithinTwoYears"] = df_t["true_probs"].apply(singular_row_bernoulli_draw)
                    add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta
                    run_info["sigma_true"] = sigma_true_value
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma_true_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]
                    
                    add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, (-1*mu_suff), group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]


                    if (sigma_true_value == 0) or (sigma_true_value == .25):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1E_suff_"+ str(sigma_true_value)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1E_suff_"+ str(sigma_true_value)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "1E_suff"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)

   

           

            

            #2E
            
            df_copy = df_copy_within_run.copy(deep = True)

            mu = 0
            _Delta = 0
            _delta = 0.5
            sigma_true = .6
            sigma = .2
            mu_suff = 0

            #df_t = df_copy.copy(deep = True)


            sigma_true_values   = [i/10 for i in list(range(0,21))]

            #df_t = df_copy.copy(deep = True)
            if "2E" in simulations_to_run:
                for sigma_true_value in sigma_true_values: 
                    print("running for sigma_true_value: " + str(sigma_true_value))

                    df_t["true_log_odds"] = copy_log_odds_org

                    log_odds, _, _ = produce_true_log_odds(df_t, cols_t,group_ind, _Delta,sigma_true_value, coefficient_map )
                    df_t["true_log_odds"] = log_odds

                    add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, _delta, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]


                    add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["mu_suff"] = mu_suff
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta
                    run_info["sigma_true"] = sigma_true_value
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = sigma_true_value

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (sigma_true_value == 0) or (sigma_true_value == .25):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2E_"+ str(sigma_true_value)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2E_"+ str(sigma_true_value)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "2E"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            
            #5A

            mu = 0
            _Delta = 0
            _delta = -0.5
            sigma_true = .6
            sigma = .2

            #df_t = df_copy.copy(deep = True)


            mu_values = [i/10 for i in list(range(0,21))]
            
            if "5A" in simulations_to_run:
                for mu in mu_values: 
                    print("running for mu: " + str(mu))

                    df_t["true_log_odds"] = copy_log_odds_org

                    log_odds, _, _ = produce_true_log_odds(df_t, cols_t,group_ind, _Delta,sigma_true, coefficient_map )
                    df_t["true_log_odds"] = log_odds

                    add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, _delta, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]


                    add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta
                    run_info["sigma_true"] = sigma_true_value
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = mu

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (mu == 0):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_5A_"+ str(mu)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_5A_"+ str(mu)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "5A"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            
                        #5A

            mu = 0
            _Delta = 0
            _delta = 0.5
            sigma_true = .6
            sigma = .2

            #df_t = df_copy.copy(deep = True)


            mu_values = [i/10 for i in list(range(0,21))]
            
            if "5B" in simulations_to_run:
                for mu in mu_values: 
                    print("running for mu: " + str(mu))

                    df_t["true_log_odds"] = copy_log_odds_org

                    log_odds, _, _ = produce_true_log_odds(df_t, cols_t,group_ind, _Delta,sigma_true, coefficient_map )
                    df_t["true_log_odds"] = log_odds

                    add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, _delta, group_ind)
                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]


                    add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                    run_info["_delta"] = _delta
                    run_info["mu"] = mu
                    run_info["sigma"] = sigma
                    run_info["_Delta"] = _Delta
                    run_info["sigma_true"] = sigma_true_value
                    run_info["num_affected"] = num_affected
                    run_info["pr_subset"] = pr_subset

                    run_info["varying_parameter"] = mu

                    log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                    df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                    df_copy["tilde_p"] = df_t["predicted_probs"]
                    #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                    if (mu == 0):
                        df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_5B_"+ str(mu)+"_.csv")
                        df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_5B_"+ str(mu)+"_full_.csv")

                    yaml_funcs = YamlFunctions(yaml_configs_path)
                    fsscan_configs = yaml_funcs.run()

                    #setting up logger
                    cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                    # performing initial data preprocessing, in this case there is not any
                    data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                    data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                    # producing all scans in config file
                    scans = yaml_funcs.produce_scans(data, key, key_value)
                    for scan in scans:
                        print(scan)

                        scan_params = {}
                        scan_params["scan_info"] = scan
                        scan_params["dataset_yaml"] = dataset_yaml
                        scan_params["data"] =  data.copy(deep =True)
                        scan_params["p_bin_var"] = p_bin_var
                        scan_params["tilde_probability_var"] = tilde_probability_var
                        scan_params["df_t"] = df_t.copy(deep = True)
                        scan_params["df_copy"] = df_copy.copy(deep = True)
                        scan_params["experiment_name"] = "5B"
                        run_info["scan_params"] = scan_params
                        run_info_deep_copy = copy.deepcopy(run_info)
                        run_infos.append(run_info_deep_copy)
            

            #1CD

            df_copy = df_copy_within_run.copy(deep = True)
            sigma_true = .6
            mu = 1
            mu_suff = 0
            sigma = .2
            _delta = 0
            _Delta = 0


            num_affected_values =  [i for i in list(range(0,len(cols_t) + 1))]
            pr_subset_values = [(i/10.0) for i in list(range(0,11))]
            
            if "1CD" in simulations_to_run:
                for num_affected_value in num_affected_values:


                    for pr_subset_value in pr_subset_values:
                        if ((num_affected_value == 2) or (pr_subset_value == .5)):
                            df_t = df_copy.copy(deep = True)

                            cols_t = copy.deepcopy(cols_copy)

                            df_t, cols_t, key, key_value, group_ind, s_bias, len_p, len_c, true_log_odds, coefficient_map = pick_protected_class_bias_subset_no_filter_protected_class_given(df_t, key, key_value,  cols_t, 150, num_affected_value, pr_subset_value, _Delta,sigma_true,coefficient_map)

                            run_info_t = {}
                            run_info_t["run_number"] = run_number



                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                            print("Protected class:"+ key+ " : " + str(key_value))
                            print("Biased subset: "+ str(s_bias))
                            print("Number of individuals in bias subset for protected class: " + str(len_p))
                            print("Number of individuals in bias subset for non-protected class: " + str(len_c))
                            print("Coefficients used to protected true log-odds: "+ str(coefficient_map))

                            if ((num_affected_value != 0) and (pr_subset_value != 0)):
                                to_choose =  df_t[s_bias.keys()].isin(s_bias).all(axis=1).astype(int)
                                df_t["in_bias_subset"] = to_choose
                            else:
                                df_t["in_bias_subset"] = 0

                            run_info_t["protected_class"] = (key, key_value)
                            run_info_t["_Delta"] = _Delta
                            run_info_t["sigma_true"] = sigma_true

                            run_info_t["coefficient_values"] = coefficient_map


                            run_info_t["selected_bias_subset"] = s_bias
                            run_info_t["group_ind"] = group_ind
                            run_info_t["number_of_rows_protected_class_bias_subset"] = len_p
                            run_info_t["number_of_rows_control_bias_subset"] = len_c

                            add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                            run_info_t["_delta"] = _delta
                            run_info_t["mu"] = mu
                            run_info_t["mu_suff"] = mu_suff
                            run_info_t["sigma"] = sigma
                            run_info_t["sigma_true"] = sigma_true
                            run_info_t["_Delta"] = _Delta
                            run_info_t["num_affected"] = num_affected_value
                            run_info_t["pr_subset"] = pr_subset_value

                            run_info_t["varying_parameter"] = str(num_affected_value)+"_"+str(pr_subset_value)

                            log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                            df_copy["tilde_p"] = df_t["predicted_probs"]
                        #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]


                            if ((num_affected_value == 2) and (pr_subset_value == .5)):
                                df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1CD"+"_.csv")
                                df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1CD"+"_full_.csv")

                            yaml_funcs = YamlFunctions(yaml_configs_path)
                            fsscan_configs = yaml_funcs.run()

                            #setting up logger
                            cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                            # performing initial data preprocessing, in this case there is not any
                            data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                            data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                            # producing all scans in config file
                            scans = yaml_funcs.produce_scans(data, key, key_value)
                            for scan in scans:
                                print(scan)

                                scan_params = {}
                                scan_params["scan_info"] = scan
                                scan_params["dataset_yaml"] = dataset_yaml
                                scan_params["data"] =  data.copy(deep =True)
                                scan_params["p_bin_var"] = p_bin_var
                                scan_params["tilde_probability_var"] = tilde_probability_var
                                scan_params["df_t"] = df_t.copy(deep = True)
                                scan_params["df_copy"] = df_copy.copy(deep = True)
                                scan_params["experiment_name"] = "1CD"
                                run_info_t["scan_params"] = scan_params
                                run_info_deep_copy = copy.deepcopy(run_info_t)
                                run_infos.append(run_info_deep_copy)
                                
              #1CD_suff

            df_copy = df_copy_within_run.copy(deep = True)
            sigma_true = .6
            mu = 0
            mu_suff = 1
            sigma = .2
            _delta = 0
            _Delta = 0


            num_affected_values =  [i for i in list(range(0,len(cols_t) + 1))]
            pr_subset_values = [(i/10.0) for i in list(range(0,11))]
            
            if "1CD_suff" in simulations_to_run:
                for num_affected_value in num_affected_values:


                    for pr_subset_value in pr_subset_values:
                        if ((num_affected_value == 2) or (pr_subset_value == .5)):
                            df_t = df_copy.copy(deep = True)

                            cols_t = copy.deepcopy(cols_copy)

                            df_t, cols_t, key, key_value, group_ind, s_bias, len_p, len_c, true_log_odds, coefficient_map = pick_protected_class_bias_subset_no_filter_protected_class_given(df_t, key, key_value,  cols_t, 150, num_affected_value, pr_subset_value, _Delta,sigma_true,coefficient_map)

                            run_info_t = {}
                            run_info_t["run_number"] = run_number



                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                            print("Protected class:"+ key+ " : " + str(key_value))
                            print("Biased subset: "+ str(s_bias))
                            print("Number of individuals in bias subset for protected class: " + str(len_p))
                            print("Number of individuals in bias subset for non-protected class: " + str(len_c))
                            print("Coefficients used to protected true log-odds: "+ str(coefficient_map))

                            if ((num_affected_value != 0) and (pr_subset_value != 0)):
                                to_choose =  df_t[s_bias.keys()].isin(s_bias).all(axis=1).astype(int)
                                df_t["in_bias_subset"] = to_choose
                            else:
                                df_t["in_bias_subset"] = 0

                            run_info_t["protected_class"] = (key, key_value)
                            run_info_t["_Delta"] = _Delta
                            run_info_t["sigma_true"] = sigma_true

                            run_info_t["coefficient_values"] = coefficient_map


                            run_info_t["selected_bias_subset"] = s_bias
                            run_info_t["group_ind"] = group_ind
                            run_info_t["number_of_rows_protected_class_bias_subset"] = len_p
                            run_info_t["number_of_rows_control_bias_subset"] = len_c

                            add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                            run_info_t["_delta"] = _delta
                            run_info_t["mu"] = mu
                            run_info_t["mu_suff"] = mu_suff
                            run_info_t["sigma"] = sigma
                            run_info_t["sigma_true"] = sigma_true
                            run_info_t["_Delta"] = _Delta
                            run_info_t["num_affected"] = num_affected_value
                            run_info_t["pr_subset"] = pr_subset_value

                            run_info_t["varying_parameter"] = str(num_affected_value)+"_"+str(pr_subset_value)

                            log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                            df_copy["tilde_p"] = df_t["predicted_probs"]
                        #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]
                            
                            add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, (-1*mu_suff), group_ind)
                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]


                            if ((num_affected_value == 2) and (pr_subset_value == .5)):
                                df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1CD_suff"+"_.csv")
                                df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_1CD_suff"+"_full_.csv")

                            yaml_funcs = YamlFunctions(yaml_configs_path)
                            fsscan_configs = yaml_funcs.run()

                            #setting up logger
                            cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                            # performing initial data preprocessing, in this case there is not any
                            data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                            data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                            # producing all scans in config file
                            scans = yaml_funcs.produce_scans(data, key, key_value)
                            for scan in scans:
                                print(scan)

                                scan_params = {}
                                scan_params["scan_info"] = scan
                                scan_params["dataset_yaml"] = dataset_yaml
                                scan_params["data"] =  data.copy(deep =True)
                                scan_params["p_bin_var"] = p_bin_var
                                scan_params["tilde_probability_var"] = tilde_probability_var
                                scan_params["df_t"] = df_t.copy(deep = True)
                                scan_params["df_copy"] = df_copy.copy(deep = True)
                                scan_params["experiment_name"] = "1CD_suff"
                                run_info_t["scan_params"] = scan_params
                                run_info_deep_copy = copy.deepcopy(run_info_t)
                                run_infos.append(run_info_deep_copy)

            df_copy = df_copy_within_run.copy(deep = True)
           # deep_df_copy= df_copy.copy(deep = True)

            #2CD

            df_copy = df_copy_within_run.copy(deep = True)

            mu = 0
            mu_suff = 0
            _Delta = 0
            _delta = 0.5
            sigma_true = .6
            sigma = .2

            #df_t = df_copy.copy(deep = True)

            num_affected_values =  [i for i in list(range(0,len(cols_t)+1))]
            pr_subset_values = [(i/10.0) for i in list(range(0,11))]
            if "2CD" in simulations_to_run:
                for num_affected_value in num_affected_values:



                    for pr_subset_value in pr_subset_values:
                        if ((num_affected_value == 2) or (pr_subset_value == .5)):
                            df_t = df_copy.copy(deep = True)


                            cols_t = copy.deepcopy(cols_copy)

                            df_t, cols_t, key, key_value, group_ind, s_bias, len_p, len_c, true_log_odds, coefficient_map = pick_protected_class_bias_subset_no_filter_protected_class_given(df_t, key, key_value,  cols_t, 150, num_affected_value, pr_subset_value, _Delta,sigma_true,coefficient_map)

                            run_info_t = {}
                            run_info_t["run_number"] = run_number



                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]
                            if len(s_bias.keys()) != 0 :
                                add_bias_shifted_log_odds_choose_new_outcomes(df_t, s_bias, _delta, group_ind)
                            df_copy["ReoffendedWithinTwoYears"] = df_t["ReoffendedWithinTwoYears"]

                            print("Protected class:"+ key+ " : " + str(key_value))
                            print("Biased subset: "+ str(s_bias))
                            print("Number of individuals in bias subset for protected class: " + str(len_p))
                            print("Number of individuals in bias subset for non-protected class: " + str(len_c))
                            print("Coefficients used to protected true log-odds: "+ str(coefficient_map))

                            if ((num_affected_value != 0) and (pr_subset_value != 0)):
                                to_choose =  df_t[s_bias.keys()].isin(s_bias).all(axis=1).astype(int)
                                df_t["in_bias_subset"] = to_choose
                            else:
                                df_t["in_bias_subset"] = 0

                            run_info_t["protected_class"] = (key, key_value)
                            run_info_t["_Delta"] = _Delta
                            run_info_t["sigma_true"] = sigma_true

                            run_info_t["coefficient_values"] = coefficient_map


                            run_info_t["selected_bias_subset"] = s_bias
                            run_info_t["group_ind"] = group_ind
                            run_info_t["number_of_rows_protected_class_bias_subset"] = len_p
                            run_info_t["number_of_rows_control_bias_subset"] = len_c

                            add_bias_shifted_by_mu(df_t, s_bias, mu, sigma, group_ind)

                            run_info_t["_delta"] = _delta
                            run_info_t["mu"] = mu
                            run_info_t["sigma"] = sigma
                            run_info_t["mu_suff"] = mu_suff
                            run_info_t["sigma_true"] = sigma_true
                            run_info_t["_Delta"] = _Delta
                            run_info_t["num_affected"] = num_affected_value
                            run_info_t["pr_subset"] = pr_subset_value

                            run_info_t["varying_parameter"] = str(num_affected_value)+"_"+str(pr_subset_value)

                            log_odds_to_prob(df_t,"predicted_log_odds", "predicted_probs")

                            df_copy["tilde_p"] = df_t["predicted_probs"]
                        #df_t["predicted_probs_"+str(mu)] = df_t["predicted_probs"]

                            if ((num_affected_value == 2) and (pr_subset_value == .5)):
                                df_copy.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2CD"+"_.csv")
                                df_t.to_csv(folder_path_org+"/_run_num_"+str(run_number)+"_2CD"+"_full_.csv")

                            yaml_funcs = YamlFunctions(yaml_configs_path)
                            fsscan_configs = yaml_funcs.run()

                            #setting up logger
                            cbs_logger = CBSLogger(fsscan_configs["results_folder"])

                            # performing initial data preprocessing, in this case there is not any
                            data_specs_func = DatasetSpecificFuncs(fsscan_configs["data_set_specific_yaml"], "tilde_p", "p_bin_var")
                            data, dataset_yaml, tilde_probability_var, p_bin_var = data_specs_func.run(df_copy)

                            # producing all scans in config file
                            scans = yaml_funcs.produce_scans(data, key, key_value)
                            for scan in scans:
                                print(scan)

                                scan_params = {}
                                scan_params["scan_info"] = scan
                                scan_params["dataset_yaml"] = dataset_yaml
                                scan_params["data"] =  data.copy(deep =True)
                                scan_params["p_bin_var"] = p_bin_var
                                scan_params["tilde_probability_var"] = tilde_probability_var
                                scan_params["df_t"] = df_t.copy(deep = True)
                                scan_params["df_copy"] = df_copy.copy(deep = True)
                                scan_params["experiment_name"] = "2CD"
                                run_info_t["scan_params"] = scan_params
                                run_info_deep_copy = copy.deepcopy(run_info_t)
                                run_infos.append(run_info_deep_copy)

            df_copy = deep_df_copy
            deep_df_copy= df_copy.copy(deep = True)







            print("Number of runs: "+ str(len(run_infos)))
            


            # shuffle list 

            random.shuffle(run_infos)

            # running multi-parralel processing

            unsuccessful = False
        except Exception as e:
            print("running again : regenerating : error below")
            print(traceback.format_exc())


    active_workers = 0
    completed_workers = 0

    active_processes_list = []
    active_worker_constant = 15


    #run_infos = run_infos[:69]
    while (len(run_infos) >0) or (active_workers > completed_workers):
        print(len(run_infos) )

        # checking if there are less than 10 processes running
        if ((len(active_processes_list) < active_worker_constant) and ( len(run_infos) >0)):
            print(len(run_infos))

            needed_workers = active_worker_constant - len(active_processes_list)
            
            if (needed_workers > (len(run_infos))):
                needed_workers = len(run_infos)

            #data_copy = sample_df.copy(deep= True)

            print("Will create " + str(needed_workers) + " processes")

            new_workers = [mp.Process(target = run_wrapper, args = ([run_infos.pop()])) for x in range(0,needed_workers )]
            
            print(len(run_infos))

            for worker in new_workers:
                active_workers = active_workers + 1
                worker.start()

                print("starting worker " + str(active_workers))

            active_processes_list = active_processes_list + new_workers

        # sleep for 30 seconds

        print("sleeping for 30 seconds")

        time.sleep(30)

                                                                                                                                # check if workers are alive or not
        if (len(run_infos) == 0):
            print("all jobs are assigned.. waiting for all workers to complete")

        replacement_list = []

        for process in active_processes_list:
            if (process.is_alive() == True):
                replacement_list.append(process)
            else:
                print("there is a complete worker")
                completed_workers = completed_workers + 1
                print("total complete workers : " + str(completed_workers))
                if (process.exitcode != 0):
                    print("there was an unsuccesfful run!")
                    print(process.exitcode)
                    #sys.exit("issue with run")
                    
                process.terminate()

                print('ended completed process')


        active_processes_list = replacement_list

          
            
        


