defaults:
  - model: small_rtt
  - data: sine
  - _self_

lr: 3e-4
n_epochs: 500
batch_size: 64

n_samples: null
num_workers: 8
eval_every: 10

compile: false
compile_kwargs:
  # mode: reduce-overhead
  mode: null
  options:
    matmul-padding: True

optim:
  cls: torch.optim.AdamW
  kwargs:
    lr: ${lr}
    weight_decay: 5e-4
    amsgrad: True
    fused: False

use_scheduler: False
scheduler:
  cls: lr_scheduler.CosLRScheduler
  kwargs:
    warmup_steps: 4000
    decay_steps: 80000
    
load_ckpt: null

use_amp: False
gradscaler:
  enabled: ${use_amp}
autocast:
  device_type: cuda
  enabled: ${use_amp}
  dtype: float16

clip_grad: True
clip_grad_max_norm: 1.0

seed: 42
gpu: 0
save_path: ./output
wandb:
  project: ssl_sine
  entity: null
  name: null

matmul_precision: high
cudnn_benchmark: False

debug: False

embedding_dim: 16
temperature: 0.1
# eval_every: 1

# model:
#   kwargs:
#     d_out: ${embedding_dim}