EXECUTION:
  DATAREADER:
    CRAWLER_ARGS: 
      data_folder: Data
      include: ["kagglefn_short"]
    DATAREADER: DataReader
    DATASET_ARGS:
      classificationclass: ['fnews', 'dataset']
      maxlen: 512
      memcache: True
      mlm_probability: 0.15
      masking: True
    GENERATOR_ARGS: 
      do_lower_case: True
      spm_model_file: 30k-clean.model 
      tokenizer: AlbertFullTokenizer
      vocab_file: 30k-clean.vocab
  EPOCHS: 16
  FP16: False
  MODEL_SERVING: False
  OPTIMIZER_BUILDER: ClassificationOptimizer
  SKIPEVAL: False
  TEST_FREQUENCY: 3
  TRAINER: BaseTrainer
  TRAINER_ARGS: 
    accumulation_steps: 1

SAVE:
  MODEL_VERSION: 1
  MODEL_CORE_NAME: "midas_expert"
  MODEL_BACKBONE: "albert"
  MODEL_QUALIFIER: "kagglefn_short"
  DRIVE_BACKUP: True
  SAVE_FREQUENCY: 5 # Epoch
  CHECKPOINT_DIRECTORY: "./drive/MyDrive/Projects/MiDAS/Models/"

TRANSFORMATION:
  BATCH_SIZE: 16

MODEL:
  BUILDER: ednaml_model_builder
  MODEL_ARCH: MiDAS
  MODEL_BASE: Albert
  MODEL_KWARGS: 
    vocab_size_or_config_json_file: 30000
    embedding_size: 128
    hidden_size: 768
    num_hidden_layers: 12
    num_hidden_groups: 1
    num_attention_heads: 12
    intermediate_size: 3072
    inner_group_num: 1
    hidden_act: gelu_new
    hidden_dropout_prob: 0
    attention_probs_dropout_prob: 0
    max_position_embeddings: 512
    type_vocab_size: 2
    initializer_range: 0.02
    layer_norm_eps: 1.0e-12
    pooling: average
  MODEL_NORMALIZATION: bn
  PARAMETER_GROUPS: [opt-1]

LOSS: 
  - LOSSES: ['SoftmaxLogitsLoss']
    KWARGS: [{}]
    LAMBDAS: [1.0]
    LABEL: fnews
    NAME: classification

OPTIMIZER:
  - OPTIMIZER: AdamW
    BASE_LR: 1.0e-5
    LR_BIAS_FACTOR: 1.0
    OPTIMIZER_KWARGS: 
      eps: 1.0e-6
    OPTIMIZER_NAME: opt-1
    WEIGHT_BIAS_FACTOR: 0.0005
    WEIGHT_DECAY: 0.0005

SCHEDULER:
- LR_KWARGS: {step_size: 20}
  LR_SCHEDULER: StepLR
  SCHEDULER_NAME: opt-1

LOGGING: 
  STEP_VERBOSE: 100