[INFO]
num_nodes = 1
gpus_per_node = 2
cpus_per_gpu = 8
prefetch_factor = 2
precision = 16-mixed
fix_random_seed = yes
; change to ddp if multiple_gpus
strategy = ddp
if_profile = no

[DATA]
dataset = CIFAR10
n_views = 16
n_trans = 1
augmentations = RandomResizedCrop,GaussianBlur,RandomGrayscale,ColorJitter,RandomHorizontalFlip
;augmentation_package = torchvision
augmentation_package = albumentations
crop_size = 32
crop_min_scale = 0.08
crop_max_scale = 1.0
hflip_prob = 0.5
blur_kernel_size = 3
blur_prob = 0.5
grayscale_prob = 0.2
jitter_brightness = 0.8
jitter_contrast = 0.8
jitter_saturation = 0.8
jitter_hue = 0.2
jitter_prob = 0.8


[SSL]
backbone = resnet18
;backbone_out_dim = 2048
use_projection_head = yes
proj_dim = 2048
proj_out_dim = 256
optimizer = LARS
lr = 7.5
lr_scale = linear
lr_scheduler = cosine-warmup
momentum = 0.0
weight_decay = 1e-4
exclude_bn_bias_from_weight_decay = yes

;for LARS optimizers, if not LARS then lars_eta is redandunt
lars_eta = 0.001 
loss_function = LogRepulsiveEllipsoidPackingLossUnitNorm
;loss_function = EllipsoidPackingLoss
lw0 = 0.0
lw1 = 1.0
lw2 = 0.0
pot_pow = 2.0
rs = 7.0
warmup_epochs = 20
n_epochs =  1000
batch_size = 128
save_every_n_epochs = 100
skip_validation = no
[LC]
output_dim = 10
optimizer = Adam
use_batch_norm = no
lr_sweep = 0.3,0.1,0.05
lr_scale = linear
lr_scheduler = cosine
weight_decay = 0.0
momentum = -1
;momentum is dummy for adam
loss_function = CrossEntropyLoss
n_epochs = 100
save_every_n_epochs = 50
batch_size = 1024
apply_simple_augmentations = yes
standardize_to_imagenet = no
; some notes
; solo-learn is good reference for hyperparameters
; lr ~ batch_size/256

