CUDA_PATH ?= /usr/local/cuda
NVCC := $(CUDA_PATH)/bin/nvcc
ARCH ?= sm_86
NVCCFLAGS ?= -O3 --use_fast_math -maxrregcount=64
PTX_PATH := lib/kernel.ptx
CUBIN_PATH := lib/kernel.cubin

.PHONY: all clean

all: $(PTX_PATH) $(CUBIN_PATH)

lib:
	@mkdir -p $@

$(PTX_PATH): src/langevin_kernel.cu include/langevin_kernel.h | lib
	$(NVCC) $(NVCCFLAGS) -ptx -arch=$(ARCH) -Iinclude -o $@ src/langevin_kernel.cu

$(CUBIN_PATH): src/langevin_kernel.cu include/langevin_kernel.h | lib
	$(NVCC) $(NVCCFLAGS) -cubin -arch=$(ARCH) -Iinclude -o $@ src/langevin_kernel.cu

clean:
	rm -rf lib
