model:
  name: "AutoModelForSequenceClassification"
  args: ["roberta-base"]
  kwargs:
    num_labels: 2
    cache_dir: "./datas/models/roberta-base"
  lora:
    task_type: "SEQ_CLS"
    r: 8
    lora_alpha: 16
    lora_dropout: 0.1
  trainables: ["^base_model\\..*\\.encoder\\..*", "^base_model\\..*\\.classifier\\..*"]

dataset:
  tokenizer_args:
    name: "roberta-base"
    cache_dir: "./datas/tokenizers/roberta-base"
    max_length: 128
    # max_length: 512
  input_columns: ["input_ids", "attention_mask", "labels"]