experiment_name: rms_hyperparam_sweep
trainer:
  limit_train_batches: 100
  max_epochs: 200
  logger:
    - class_path: lightning.pytorch.loggers.CSVLogger
      init_args:
        save_dir: "/scratch/nia4240/attention-scratch/csv_logs"
  strategy:
    class_path: lightning.pytorch.strategies.SingleDeviceStrategy
    init_args:
      device: 'cuda:0'
model:
  dim_feedforward: 2048
  # bias:
  # nheads:
  # num_layers:
  # positional_dim: 
  # width_multiplier:
data:
  batch_size: 1024  # this is rated up to dim=16, num_points=2 num_queries=-1
  dataset_class: task.NearestPointDataset
  # dim:
  num_points: 8
  num_queries: -1
  num_workers: 7
optimizer:
  lr: 0.01
  betas:
    - .9
    - .95
  weight_decay: 0
lr_scheduler:
