# Environment Settings
# These settings specify the hardware and distributed setup for the model training.
# Adjust `num_gpus` and `dist_config` according to your distributed training environment.
env_setting:
  num_workers: 4  # Number of worker threads for data loading.
  num_gpus: 1  # Number of GPUs. Now we don't support CPU mode.
  seed: 1234  # Seed for random number generators to ensure reproducibility.

# Datapath Configuratoin
data_cfg:
  train_clean_json: data/train_clean.json
  train_noisy_json: data/train_noisy.json
  valid_clean_json: data/valid_clean.json
  valid_noisy_json: data/valid_noisy.json
  test_clean_json: data/test_clean.json
  test_noisy_json: data/test_noisy.json

# Training Configuration
# This section details parameters that directly influence the training process,
# including batch sizes, learning rates, and optimizer specifics.
training_cfg:
  training_epochs: 300 # Training epoch.
  batch_size: 32  # Training batch size.
  learning_rate: 0.0005  # Initial learning rate.
  decay_period: 10  # Learning rate decay period.
  decay_factor: 0.7 # Learning rate decay factor.
  norm: False  # Whether to normalize the input features.
  clip: -1 # Gradient clipping threshold. -1 for no clipping.
  segment_size: 32000  # Audio segment size used during training, dependent on sampling rate.
  use_PCS400: False  # Use PCS or not
  data_normalization: True  # Normalize the input data or not

# STFT Configuration
# Configuration for Short-Time Fourier Transform (STFT), crucial for audio processing models.
# This in not used in the training process, but is required for the model loading since We are using the same dataloader for all models
stft_cfg:
  sampling_rate: 16000  # Audio sampling rate in Hz.
  n_fft: 400  # FFT components for transforming audio signals.
  hop_size: 100  # Samples between successive frames.
  win_size: 400  # Window size used in FFT.

# Model Configuration
# Defines the architecture specifics of the model, including layer configurations and feature compression.
model_cfg:
  model_type: DeepANC
  compress_factor: 1  # Compression factor applied to extracted features.
  win_size: 0.020  # Window size for the model.
  hop_size: 0.010  # Hop size for the model.
  sample_rate: 16000  # Audio sampling rate in Hz for the model

# Room Impulse Response Generator Configuration
# Configuration for Short-Time Fourier Transform (STFT), crucial for audio processing models.
rir_cfg:
  rir_files_path: data/sim_rir_16k
  type: RIR # Type of simulation, "RIR" for Room Impulse Response. "PyRoom" for Pyroomacoustics simulation.
  sampling_rate: 16000  # Audio sampling rate in Hz.
  reverberation_times: [0.15, 0.175, 0.2, 0.225, 0.25]  # reverberation times to simulate.
  rir_samples: 512
  hp_filter: True  # high-pass filter.
  version: 5  # simulation version.