# dataset settings
seed = 0
dataset_type = 'Anthropics'
modality = 'text' 
num_classes = 2

data_root = '/YOUR_PATH/benchmark/anthropics/'
key = "red_team" # harmless, red_team
file_name = f'anthropic_{key}_raw'
sel_label = 0
details = False
duplicate = False

dataset_path = data_root + f"anthropic_{key}_processed_s1_whole" + '.pt'
save_path = data_root + f"embedding_{key}_s1_whole/"

embedding_model = 'sentence-transformers/all-mpnet-base-v2'
embedding_cfg = dict(
    shuffle = False,
    batch_size = 256,  
    save_num = 800,
    num_workers = 2,
)


hoc_cfg = dict(
    max_step = 1501, 
    T0 = None, 
    p0 = None, 
    lr = 0.1, 
    num_rounds = 150, # 150
    sample_size = 50000,
    already_2nn = False,
    device = 'cpu',
    balance = True,
    ind_sample = True,
    independent_ratio = 0.5,
    only_last = False,
)


detect_cfg = dict(
    num_epoch = 1001,
    sample_size = 50000,
    k = 10,
    name = 'simifeat',
    method = 'rank'
)