_target_:  pytorch_lightning.Trainer

max_epochs: 1000
accelerator: cuda
devices: [0]
#accelerator: cpu	# cannot be used, because embedding layer needs xformer, which is cuda only
deterministic: False
detect_anomaly: False
enable_progress_bar: True
strategy: ddp
precision: 16
accumulate_grad_batches: 
val_check_interval: 2000
log_every_n_steps: 1
num_sanity_val_steps: 2
limit_val_batches: 20
limit_test_batches: 

callbacks: "${oc.dict.values: callback}"
