algo: "bp"
problem: "vision"
seed: 0

#task:
n_rounds: 1000
dataset: "mnist"
dist: "NON-IID" # IID, NON-IID, ONE, TWO
pholder: [1, 28, 28, 1]
num_classes: 10
n_clients: 1
n_shards_per_client: 1
use_max_padding: False
batch_in_gpu: True
#train:
batch_size: 512
opt_name: "sgd"
lr: 0.011149277202806797
momentum: 0.8099114269439881

#network_name: "VGG"
#network_config:
#  block_depths: [2, 2]  # Two blocks, each with 2 layers
#  features_per_block: [32, 64]  # 32 features for the first block, 64 for the second
#  kernel_size: 3  # 3x3 kernels
#  strides: 1
#  dense_layers: [128]  # One dense layer with 128 units before the output layer
#  num_output_units: 10  # 10 classes for MNIST digits

#network_name: "CNN"
#network_config:
#  depth_1: 1
#  depth_2: 1
#  features_1: 8
#  features_2: 16
#  kernel_1: 5
#  kernel_2: 5
#  strides_1: 1
#  strides_2: 1
#  num_linear_layers: 0
#  num_output_units: 10

network_name: "ResNet"
network_config:
  num_output_units: 10  # 10 classes for MNIST digits
  num_blocks_per_layer: [2, 2, 2]  # Less depth as MNIST is simpler
  features_per_layer: [32, 64, 128]  # Reduced complexity
  kernel_size: 3  # Standard choice
  strides: 1  # Standard choice
  output_activation: "softmax"  # Suitable for classification
  kernel_init_type: "lecun_normal"  # Good default choice


#network_name: "VGG"
#network_config:
#  block_depth: [2, 2, 3, 3, 3]
#  features_per_block: [64, 128, 256, 512, 512]
#  kernel_size: 5
#  strides: 1
#  num_output_units: 10

#sparse:
sparsify: True
percentage: 0.5

#quantize:
quantize: True
n_bits: 8

#eval:
eval_every: 1

#save:
save_model: True
save_dir: "saved_models"