#ifndef _STACK_CUDA_UTILS_H
#define _STACK_CUDA_UTILS_H

#include <cmath>

#define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))

#endif
