# Configuration for local CPU testing of tabular regression
# Based on train_tabular.yaml but optimized for CPU and quick iteration

defaults:
  - train_tabular

device: cpu  # Force CPU for local testing

# Data configuration - override for local testing
data:
  train_path: data/tabular/5gb
  val_path: data/tabular/5gb  # Same path - will use chunk splitting
  use_chunk_split: true  # Use first chunk for val, rest for train
  num_chunks: 128  # Total number of chunks in dataset
  num_workers: 2  # Reduced for local CPU
  batch_size: null  # Each file is already a complete batch
  val_subset_size: 50  # Use only 50 batches for faster validation

# Training configuration - optimized for CPU
training:
  num_epochs: 3  # Very short for quick testing
  grad_clip: 0.5
  compile_model: false  # Compilation disabled on CPU
  compile_mask: false  # Compilation disabled on CPU
  compile_mode: default
  fullgraph: false
  dynamic: false
  prewarm_compilation: false  # No prewarming on CPU
  use_amp: false  # No AMP on CPU
  amp_dtype: bfloat16
  val_interval: 1

# Optimizer - can keep same settings
optimizer:
  lr: 1e-4
  weight_decay: 0.0

# Scheduler - simpler for testing
scheduler:
  use_scheduler: true
  name: cosine_with_warmup
  warmup_ratio: 0.02

# Checkpoint configuration - local paths
checkpoint:
  save_dir: checkpoints/tabular_cpu_test_${now:%Y-%m-%d}/${now:%H-%M-%S}
  save_interval: 1  # Save every epoch for testing

# Logging configuration - with wandb
logging:
  use_wandb: true  # Enable wandb for tracking
  project: ace-tabular
  run_name: tabular-cpu-test-${now:%Y%m%d-%H%M%S}
  log_interval: 10  # More frequent logging for debugging
  tags: ["ace", "tabular", "cpu", "test", "5gb"]

# Ensure diagonal-less mask is used in CPU testing too
model:
  include_diagonal_mask: false
