# dataset settings
seed = 0
dataset_type = 'Jigsaw'
modality = 'text' 
num_classes = 2
data_root = '/YOUR_PATH/benchmark/jigsaw/'

file_name = 'all_data'
sel_label = 0
details = False


dataset_path = data_root + file_name + '.pt'
save_path = data_root + "embedding/"
embedding_model = 'sentence-transformers/all-mpnet-base-v2'
embedding_cfg = dict(
    force_get = False,
    shuffle = False,
    batch_size = 256,
    save_num = 800,
    num_workers = 2,
    use_pca = False,
    use_mi = False,

)



hoc_cfg = dict(
    max_step = 1501, 
    T0 = None, 
    p0 = None, 
    lr = 0.1, 
    num_rounds = 150,  # 150
    sample_size = 50000,
    already_2nn = False,
    device = 'cpu',
    balance = True
)

diagnose_cfg = dict(
    need_preprocess = True,
)


detect_cfg = dict(
    num_epoch = 1001,  # 1001
    sample_size = 50000,
    k = 10,
    name = 'simifeat',
    method = 'rank'
)

