# dataset settings
seed = 0
dataset_type = 'PKU_Align'
modality = 'text' # image, text, tabular
num_classes = 2
data_root = '/YOUR_PATH/benchmark/pku_align/'
# data_root = './data/pku_align/'
key = "PKU-SafeRLHF" # harmless, red_team
file_name = "PKU-SafeRLHF"
sel_label = 0
details = True
duplicate = False

dataset_path = data_root + f"pku_align_{key}_processed" + '.pt'
save_path = data_root + f"embedding_{key}/"

embedding_model = 'sentence-transformers/all-mpnet-base-v2'
embedding_cfg = dict(
    shuffle = False,
    batch_size = 256,  
    save_num = 800,
    num_workers = 2,
)


hoc_cfg = dict(
    max_step = 1501, 
    T0 = None, 
    p0 = None, 
    lr = 0.1, 
    num_rounds = 10, 
    sample_size = 50000,
    already_2nn = False,
    device = 'cpu',
    balance = True,
    ind_sample = True,
    independent_ratio = 1.0,
)


detect_cfg = dict(
    num_epoch = 1001,
    sample_size = 50000,
    k = 10,
    name = 'simifeat',
    method = 'rank'
)