'''
- datagen_conf.py
- This file handles the global configuration for the data generation functions used in VIDS
'''


# Data Locations
QUERY_STEMS_DIR = "src/data/fine_tuning/query_design/"
STEMS_LOC = "src/data/fine_tuning/query_stems/query_stems.json"
GMB_LOC = "src/data/fine_tuning/gmb_corpus.csv"
STATIC_ATTR_LOC = "src/data/fine_tuning/static_attributes.json"
1
# Generation locations
ARTIFICIAL_DATA_LOC = "src/data/fine_tuning"
GMB_DATASET_LOC = "src/data/fine_tuning/ner/gmb/"
QUESTION_NUMBER = 26

# Artificial query parameters
FRAME_SCALE = 8 # Set this value to 0 to remove scaling, otherwise specify a value to reduce the number of frames generated
SMALL_FRAME_SCALE = 4
FILL_SCALE = 3 # Set this value to 0 to remove scaling, otherwise specify a value to reduce the number of filled values per frame
SMALL_FILL_SCALE = 3
PERM_SCALE = 2 # Set this value to 0 to remove scaling, otherwise specify a value to reduce the number of frame permutations per schema feature
SMALL_PERM_SCALE = 2
NUM_SYNONYMS = 2
NUM_PERM = 2
CONLL_FORMAT = "iob"# This changes CONLL data generation from iob to iob2 format
MAX_REPEATING_BLOC_COUNT = 3

# Split weights
TRAIN_WEIGHT = 90
VALID_WEIGHT = 10

# Validation parameters *****MIGHT BE UNNECESSARY, ADDRESS IN FINE-TUNING REFACTOR*****
FOLD_COUNT = 5
BATCH_SIZE = 64