BATCH_SIZE: 65536
BLOCK_SIZE_BACKWARD: 1024
BLOCK_SIZE_FORWARD: 1024
DKAN_BASE_LR: 1e-3
DKAN_FROBENIUS_DECAY_SCALE: 30000
DKAN_FROBENIUS_DECAY_STEPS: 2500000
DKAN_LEARNING_RATE_DECAY_SCALE: 300000
DKAN_LEARNING_RATE_DECAY_STEPS: 2200000
DKAN_TURN_ON_CAP: 0.3
DKAN_TURN_ON_SCALE: 60000
DKAN_TURN_ON_STEPS: 120000
FROBENIUS_WEIGHT_CAP: 1e-20
HIDDEN_DIM: 32
INITIAL_FROBENIUS_WEIGHT: 1e-3
INIT_SCALE: 0.1
N_CHUNKS: 12
N_INNER_LAYERS: 1
PURE_MLP_STEPS: 15000
RECORD_INTERVAL: 100
TEACHER_PATH: teachers/teacher_32_32
TILE_SIZE_BACKWARD: 4
TILE_SIZE_FORWARD: 8
