#!/bin/bash
#SBATCH --job-name=ssl
#SBATCH --output=output_%j.log             
#SBATCH --error=error_%j.log               
#SBATCH --nodes=NUM_NODES                  
#SBATCH --ntasks-per-node=NTASKS_PER_NODE                
#SBATCH --cpus-per-task=CPUS_PER_TASK      
#SBATCH --gres=GRES:GPUS_PER_NODE                        
#SBATCH --time=TIME                        
#SBATCH --mem=MEM_PER_NODE 
##SBATCH --constraint=a100
##SBATCH --constraint=rtx8000|a100
                

## comments:
## see https://lightning.ai/docs/pytorch/stable/clouds/cluster_advanced.html

## !must use srun to assign resources
## see https://lightning.ai/docs/pytorch/latest/clouds/cluster_advanced.html#troubleshooting
## troubleshooring section
## To add constraints for the machines
##SBATCH --constraint=rtx8000|a100
##SBATCH --partition=chemistry_a100_2
##SBATCH --nodelist=ga029,ga031
##SBATCH --constraint=a100


srun singularity exec --nv --overlay /vast/work/public/ml-datasets/imagenet/imagenet-train.sqf:ro\
            --overlay /vast/work/public/ml-datasets/imagenet/imagenet-val.sqf:ro\
            --overlay /scratch/gz2241/sig-ml/overlay-15GB-500K.ext3:ro \
            /scratch/work/public/singularity/cuda12.1.1-cudnn8.9.0-devel-ubuntu22.04.2.sif\
            /bin/bash -c  "source /ext3/env.sh; 
            conda activate CONDA_ENV;
            python pretrain.py ARG1 ARG2;
            python linear_probe.py ARG1 ARG2;
            python semi_sl.py ARG1 ARG2"
