###########
# General #
###########
project_name: "initialization_experiments" # baseline for synthetic experiments
experiment_name: "effect_of_w_BN_not_whitened_MNIST_linear_Net_num_init=2"
num_initialization: 1
verbose_level: 0 
calc_hess_info : True
calc_H: False
calc_H_O_info : False
log_yaml_file: True
device: 'cpu'

#########
# Model #
#########
# possible models: sequential, lin_residual_network, sequential_w_fully_skip
model_name: 'sequential'
hidden_layers: [1]
width: [10]
init_type: 'kaiming_normal' #kaiming_normal, xavier_normal, orthogonal
# possible activation functions: linear, relu, gelu, leaky_relu
activation_func: 'linear'
batch_norm: True
# for leaky_relu:
neg_slope: 0.01
# for skip connections:
beta: 1 #scalar or '1/sqrt(L)', '1/L'
seed: 314159

############
# Analysis #
############
# method to calculate the condition number of the Hessian and outer product Hessian
method_cond_num: 'naive'

#########
# Data #
#########

# possible choices: gaussian, mnist, fashion, cifar-10
dataset: 'mnist'
input_dim: [49] #needs to be adjusted accordingly (depending on downsample_factor) if using MNIST as dataset
output_dim: [10]

# synthetic bimodal gaussian dataset 
data_pts_per_class_training: 25000
data_pts_per_class_validation: 200
data_pts_per_class_testing: 200
whiten_gaussian: True

# MNIST dataset
downsample_factor: 4
datapoints: 1000
whiten_mnist: False

# Cifar-10 dataset
grayscale: False
flatten: True
whiten_cifar10: False
datapoints_cifar10: 50000

############# 
# Optimizer #
############# 
loss_func: 'mse'


