title = "Configuration file used for scaling N-body mass-spring data"

Nx_start                = 2                   # Scaling number of nodes starting point
Nx_gnn                  = 8                   # specifies when to not train new GNNs and use last one for inference
Nx_end_gnn              = 4096                # is the scaling end for GNN
Nx_end_fcnn             = 1024                # is the scaling end for FCNN due to training memory overhead
dof                     = 2
dtype                   = "float"             # choices: 'float', 'double'
auto_diff_mode          = "forward"           # choices: 'forward' or 'reverse', only affects the SWIM models when computing the dense layer output gradient w.r.t. inputs q and p to construct the linear system
device                  = "cpu"               # choices: 'cpu' or 'cuda'

[data]
n_points                = 4_000               # number of data points
train_test_split        = 0.5
mass                    = 1.0                 # should be positive
spring_constant         = 1.0                 # should be positive
x_min                   = -1.0                # should be less than x_max
x_max                   = +1.0                # should be greater than x_min
meshing                 = "rectangular"       # choices: 'rectangular', 'diagonal', 'cross'
data_seed               = 5619456             # random state for the data generation

[model]
width                   = 512                 # model width, i.e., size of the input to the last linear map
enc_width               = 64                  # encoding width for GNN for node, edge and message encoders
direct                  = false               # assume directed graph or not, only affects GNN
local_pooling           = "sum"
global_pooling          = "sum"
activ_str               = "softplus"          # choices: 'tanh', 'relu', 'silu', 'gelu', 'elu', 'mish', 'softplus', 'softsign'
init_method             = "relu"              # only affects gradient-descent based training, specifies init method for the biases of the network, choices are 'relu' for relu-like and 'tanh' for tanh-like activation functions
model_seed              = 9165422             # random state for the model initialization, only affects adam-training

[train]
elm_bias_start          = -1.00               # bias low value when uniformly sampling the bias for random-feature dense sampling
elm_bias_end            = +1.00               # bias high value when uniformly sampling the bias for random-feature dense sampling
resample_duplicates     = true                # whether to resample duplicate weights to avoid redundant parameters
driver                  = "gels"              # driver to use for the linear solver, choices: QR based 'gels' or 'gelsy', SVD based 'gelsd', 'gelss'
rcond                   = 1e-06               # regularization for the linear solver
sampling_seed           = 119425              # random state for the sampling of dense layer parameters
n_steps                 = 10_000
batch_size              = 256
weight_init             = "kaiming_normal"
lr_start                = 0.01
lr_end                  = 5e-5
weight_decay            = 1e-6
patience                = 10_000
optim_type              = "adam"
sched_type              = "exponential"
