# GLUE CoLA Configuration

# Model parameters
sequence_length: 128
hidden_dim: 768
num_heads: 12
num_layers: 12
alpha: 1.0
beta: 2.0
gamma: 1.5
dropout: 0.1
use_enhanced: true

# Training parameters
batch_size: 16
learning_rate: 2e-5
weight_decay: 0.01
num_epochs: 3
warmup_steps: 500
max_grad_norm: 1.0

# Data parameters
dataset_path: data/glue_cola
tokenizer_name: bert-base-uncased
max_length: 128

# Reproducibility
seed: 42
deterministic: true

