cuda_visible_devices: "6"
device: gpu

dataset_name: "HND"
path_models: /path/to/models/folder/
file_to_save: project_name_for_wandblogger
logger_name: model_name_to_be_logged
logger_file: logger_filename.csv
num_executions: 5
multi_layer: False                # True for multi-layer attention, False for single-layer attention

load_data_paths:
  in_path: "/path/to/data/folder/"
  data_train: "articles-training-byarticle-20181122.xml"        #downloaded from https://zenodo.org/records/5776081
  labels_train: "ground-truth-training-byarticle-20181122.xml"  #downloaded from https://zenodo.org/records/5776081
  data_test: "articles-test-byarticle-20181207.xml"             #downloaded from https://zenodo.org/records/5776081 
  labels_test: "ground-truth-test-byarticle-20181207.xml"       #downloaded from https://zenodo.org/records/5776081
  with_val: False                 # True if validation set is available, False otherwise (like this case)

model_arch_args:
  num_classes: 2
  lr: 0.001
  embed_dim: 384 
  num_heads: 4
  dropout: 0.2
  hidden_dim : 128
  intermediate: True
  activation_attention: "relu"    # "relu" when using ReLU activation for attention, "sigmoid" for Sigmoid based-attention, and "anneal_decrease" for traditional Softmax attention adopting temperature annealing strategy.
  attn_dropout: 0.1               # only used when activation_attention is "relu" or "sigmoid"
  ### if activation_attention is "anneal_decrease", the following parameters are used:
  # temperature_scheduler : "anneal_decrease"
  # temperature_step: 0.0001 
  # activation_attention: "softmax"

  
batch_size: 32
with_cw: True                   

trainer_args:
  max_epochs: 20
  enable_progress_bar: False

early_args:
  patience: 5
  min_delta: 0.001  