# Base configuration for high-precision GP training
# This serves as a template - specific buffer configs will override

device: cuda

# Data configuration
data:
  train_path: data/gp_128batch_16buf_256tar  # Will be overridden
  val_path: data/gp_128batch_test
  num_workers: 4
  batch_size: null  # Each file is already a complete batch
  val_subset_size: 100  # Use first 100 validation batches

# Model configuration
model:
  dim_x: 1  # 1D input
  dim_y: 1  # 1D output
  dim_model: 128
  max_buffer_size: 16  # Will be overridden in specific configs
  num_target_points: 256
  targets_block_size_for_buffer_attend: 16
  q_block_size: 128
  kv_block_size: 128
  
  embedder:
    hidden_dim: 256
    depth: 3
  
  backbone:
    num_layers: 6
    num_heads: 4
    dim_feedforward: 256
    dropout: 0.0
  
  head:
    type: MixtureGaussian  # Single channel for 1D GP
    dim_feedforward: 256
    num_components: 20
    std_min: 1e-3  # Minimum std for numerical stability

# Optimizer configuration
optimizer:
  name: adamw
  lr: 1e-4
  betas: [0.9, 0.999]
  weight_decay: 0.01  # Reduced from 0.1 for stability

# Scheduler configuration
scheduler:
  use_scheduler: true
  name: cosine_with_warmup
  warmup_ratio: 0.05  # Reduced from 0.1

# Training configuration
training:
  num_epochs: 32
  grad_clip: 0.5  # Reduced from 1.0 for stability
  compile_model: true
  compile_mask: true
  compile_mode: default
  fullgraph: false
  dynamic: false
  prewarm_compilation: true
  use_amp: false  # Disabled for numerical stability with float64 data
  amp_dtype: bfloat16
  val_interval: 1

# Checkpoint configuration
checkpoint:
  save_dir: checkpoints/gp_highprecision_${now:%Y-%m-%d}/${now:%H-%M-%S}
  save_interval: 10

# Logging configuration
logging:
  use_wandb: true
  project: fast-buffer-np
  run_name: gp-${now:%Y%m%d-%H%M%S}
  log_interval: 50
  tags: ["ace", "gp", "highprecision", "256target"]