
add_subdirectory(containers)
add_subdirectory(preprocess)
add_subdirectory(util)


list(
    APPEND
    m_npeff_src_files
    # 
    coeff_fitting/host_context.cc
    coeff_fitting/config.cc
    containers/dense_matrix.h
    containers/sparse_matrix.h
    factorizations/dn_lrm_factorization/compute_tr_xx.cc
    factorizations/dn_lrm_factorization/device_worker.cc
    factorizations/dn_lrm_factorization/io_util.cc
    factorizations/dn_lrm_factorization/manager.cc
    factorizations/dn_lrm_factorization2/worker.cc
    factorizations/dn_lrm_factorization2/io_util.cc
    factorizations/dn_lrm_factorization2/manager.cc
    factorizations/lvrm_coeff_fitting/config.cc
    factorizations/lvrm_coeff_fitting/host_context.cc
    gpu/gpu_info.cc
    gpu/types.cc
    gpu/containers/dense_matrix.cc
    gpu/containers/transfers.cc
    gpu/contexts/device_context.cc
    gpu/ops/dndn_matmul.cc
    gpu/ops/custom/ag_to_numerator.cc
    gpu/ops/custom/compute_w_ag.cc
    gpu/ops/custom/elwise_square.cc
    gpu/ops/custom/gradient_descent.cc
    gpu/ops/custom/hadamard_product.cc
    gpu/ops/custom/multiplicative_update.cc
    gpu/ops/custom/multiply_and_add_identity.cc
    inputs/dn_pefs/cl_bert_dn_lrm_pefs_loader.cc
    inputs/dn_pefs/dn_lrm_pefs.cc
    inputs/dn_pefs/dn_lrm_pefs_loader.cc
    inputs/lrm_pefs.h
    inputs/lrm_pefs.cc
    inputs/lvrm_pefs.h
    inputs/lvrm_pefs.cc
    inputs/lrm_npeff_decomposition.h
    inputs/lrm_npeff_decomposition.cc
    outputs/lrm_npeff_decomposition.cc
    outputs/W_partitions.cc
    preprocess/construct_csr_matrix.h
    preprocess/construct_csr_matrix.cc
    preprocess/column_pruning.h
    preprocess/construct_dn_pefs_partition_matrices.cc
    preprocess/construct_dn_lrm_pefs_datawise_partitions.cc
    preprocess/pef_normalization.cc
    util/array_util.h
    util/flag_util.cc
    util/h5_util.h
    util/h5_util.cc
    #
    # Dummy files, just to get sublime text to work properly.
    expansion/dummy.cc
    factorization/dummy.cc
    factorizations/lvrm_factorization/dummy.cc
    factorizations/stiefel/dummy.cc
    factorizations/util/dummy.cc
    gpu/ops/custom/dummy.cc
    gpu/ops/debugging/dummy.cc
    outputs/dummy.cc
)

add_library(
    m_npeff
    STATIC
    ${m_npeff_src_files}
)
target_include_directories(m_npeff PUBLIC ../src)

if(SET_TRUE_WHEN_ACTUALLY_BUILDING)
    set_source_files_properties(${m_npeff_src_files} PROPERTIES LANGUAGE CUDA)
endif()

# Set up the hdf5 libraries.
target_include_directories(m_npeff PUBLIC ${HDF5_INCLUDE_DIR})
target_link_directories(m_npeff PUBLIC ${HDF5_LIB_DIR})
target_link_libraries(m_npeff PUBLIC hdf5)
target_link_libraries(m_npeff PUBLIC hdf5_cpp)

# Set up the CUDA-related libraries.
target_include_directories(m_npeff PUBLIC ${CUDA_INCLUDE_DIR})
target_link_directories(m_npeff PUBLIC ${CUDA_LIB_DIR})
target_link_libraries(m_npeff PUBLIC nccl)
target_link_libraries(m_npeff PUBLIC cublas)
target_link_libraries(m_npeff PUBLIC curand)
target_link_libraries(m_npeff PUBLIC cusparse)
target_link_libraries(m_npeff PUBLIC cusolver)

target_compile_options(m_npeff PRIVATE -O3 -fno-math-errno -fno-trapping-math)
