###########
# General #
###########
project_name: "experiments" # baseline for synthetic experiments
experiment_name: "training_n=1000_xavier_init_downsample_fac=4"
num_inits: 3
max_epochs: 5000
calc_every_iter: 100
verbose_level: 0 
calc_cond_num: False
save_model: True
save_path: 'trained_models_linearNet/'
device: 'cuda:0'

#########
# Model #
#########
# possible models: sequential, sequential_w_fully_skip, lin_residual_network
model_name: 'sequential'
hidden_layers: [2]
width: [500]
bias: False
# possible activation functions: linear, relu, gelu, leaky_relu
activation_func: 'linear'
batch_norm: False
# for leaky_relu:
neg_slope: 0.01
# for skip connections:
beta: 0.5
seed: 314159

############
# Analysis #
############
# method to calculate the condition number of the Hessian and outer product Hessian
method_cond_num: 'naive'

#########
# Data #
#########

# possible choices: gaussian, mnist, fashion,cifar-10
dataset: 'cifar-10'
input_dim: 192 #[100] #needs to be adjusted accordingly (depending on downsample_factor) if using MNIST as dataset
output_dim: 10 #[10]
datapoints: 1000
whiten: True

# synthetic bimodal gaussian dataset (d=50, k=1, n=2000)


# MNIST & Cifar-10 dataset
downsample_factor: 4

# Cifar-10 dataset
grayscale: False
flatten: True

############# 
# Optimizer #
############# 
loss_func: 'mse' #mse, crossentropy
# possible optims: SGD, Adam, Adagrad
optimizer: 'SGD'
batch_size: 256
# lr: [1,5,6]
lr: [0.2]

weight_decay: 0.000

############
# Plotting #
############
hue_var: 'depth'
size_var: 'width'
