# SONATA - Unified Configuration

# ============= Main Parameters =============
# These are the primary settings to control which configuration to use
active_dataset: "traffic"  # Options: server, traffic, beijing, fitRecord
active_method: "CP"       
device: "cpu"                  # Computation device: "cpu" or "cuda"
seed: 300                      # Random seed for reproducibility

# ============= Global Parameters =============
# These apply to all datasets and methods unless overridden in specific sections
default:
  # Basic parameters
  epoch: 100              # Maximum training epochs
  fold: 1                 # Number of folds, range [1, 5]
  fix_int: true           # Whether to use fixed time intervals
  time_type: "continues"  # Time step type: "continues" or "discrete"

  # Tensor factorization parameters
  R_U: 5   # Embedding dimension (rank)
  a0: 1    # Gamma prior shape parameter
  b0: 1    # Gamma prior rate parameter
  v: 1     # Prior variance

  # Coreset parameters
  coreset_max_size: 1000           # Maximum coreset size
  coreset_threshold: 0.6          # Initial importance threshold
  adaptive_threshold: true        # Whether to use adaptive threshold
  importance_weights: [0.3, 0.2, 0.2, 0.3]  # (uncertainty, influence, novelty, martingale_increment)

  # Martingale theory parameters
  prediction_history_size: 50     # Prediction history size
  simulation_samples: 5           # Number of samples for simulation evaluation
  discount_factor: 0.9            # Bellman equation discount factor
  bellman_optimization: true      # Whether to use Bellman equation optimization

  # Multi-scale time weighting
  num_time_scales: 3              # Number of time scales
  scale_hidden_dim: 32            # Hidden layer dimension
  attention_temperature: 1.0      # Attention softmax temperature parameter
  time_scale_factor: 0.1          # Time scale transform factor (Ito formula)

  # Exploration-exploitation balance
  initial_exploration_rate: 0.9   # Initial exploration rate
  exploration_decay_rate: 0.1     # Exploration rate decay coefficient

  # Gaussian Process parameters
  kernel: "Matern_23"             # GP kernel type, "Matern_21" or "Matern_23"
  lengthscale: 0.3                # Kernel length scale
  variance: 1                     # Kernel variance
  noise: 1                        # KF observation noise

  # Optimization parameters
  DAMPING: 0.5                    # Message damping coefficient
  DAMPING_tau: 0.6                # tau parameter damping coefficient
  DAMPING_gamma: 0.5              # gamma parameter damping coefficient (only for Tucker)
  EVALU_T: 60                     # Evaluate every N time steps
  INNER_ITER: 50                  # CEP inner iteration count
  THRE: 1.0e-4                    # CEP inner iteration convergence threshold
  CEP_UPDATE_INNNER_MODE: false   # CEP update mode (true: update by mode, false: separate update)

# ============= Dataset-Specific Parameters =============
datasets:

  beijing:
    data_path: "data/beijing_20k.npy"
    kernel: "Matern_23"
    R_U: 5
    coreset_max_size: 100
    lengthscale: 0.3
    EVALU_T: 20
    discount_factor: 0.3
  
  server:
    data_path: "data/server.npy"
    kernel: "Matern_23"
    coreset_max_size: 400
    R_U: 5
    lengthscale: 0.3
    EVALU_T: 60
    discount_factor: 0.1
  
  fitRecord:
    data_path: "data/fitRecord_50k.npy"
    kernel: "Matern_21"
    coreset_max_size: 2000
    R_U: 5
    lengthscale: 0.1
    EVALU_T: 6
    discount_factor: 0.5

  traffic:
    data_path: "data/traffic_30k.npy" 
    coreset_max_size: 3000
    kernel: "Matern_21"
    R_U: 5
    lengthscale: 0.9
    EVALU_T: 10
    discount_factor: 0.9

# ============= Method-Specific Parameters =============
methods:
  CP:
    DAMPING: 0.5
    DAMPING_tau: 0.6
    DAMPING_gamma: 0.5