# NOTE: this is required because 
# target_link_libraries() does not work with cuda_add_library() 
# (in old versions of CMake)
include_directories(${CMAKE_SOURCE_DIR}/libgluon/include)

add_library(distbench STATIC src/DistBenchStart.cpp src/DistributedGraphLoader.cpp)
target_include_directories(distbench PUBLIC
  ${CMAKE_CURRENT_SOURCE_DIR}/include
)

target_link_libraries(distbench galois_cusp galois_gluon)

if(ENABLE_HETERO_GALOIS)
  # turn on cuda for distbench as well
  target_compile_definitions(distbench PRIVATE __GALOIS_HET_CUDA__=1)

  # for debugging
  add_definitions(-D__GALOIS_CUDA_CHECK_ERROR__)
  if(CMAKE_BUILD_TYPE MATCHES "Debug")
    set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; -lineinfo)

    add_definitions(-D__GALOIS_DEBUG_WORKLIST__)
  endif()
endif()

# CuSP uses DGAccumulator which uses LCI if enabled
if (USE_LCI)
  target_include_directories(distbench PUBLIC ${LWCI_INCLUDE})
endif()

function(distApp name)
  FILE(GLOB CPPSOURCES ${name}*.cpp)
  app(${name} ${CPPSOURCES} DISTSAFE)
  target_link_libraries(${name} distbench)
  if(REPORT_PER_ROUND_STATS)
    target_compile_definitions(${name} PRIVATE DIST_PER_ROUND_TIMER=1)
  endif()
  if(REPORT_COMM_STATS)
    target_compile_definitions(${name} PRIVATE MORE_COMM_STATS=1)
  endif()
  if(USE_BARE_MPI)
    target_compile_definitions(${name} PRIVATE __GALOIS_BARE_MPI_COMMUNICATION__=1)
  endif()

  if(ENABLE_HETERO_GALOIS)
    target_compile_definitions(${name} PRIVATE __GALOIS_HET_CUDA__=1)
    target_link_libraries(${name} ${name}_cuda)

    FILE(GLOB CUSOURCES ${name}*.cu)
    cuda_add_library(${name}_cuda ${CUSOURCES})
    target_link_libraries(${name}_cuda galois_gpu)
  endif()
endfunction()

function(distAppNoGPU name)
  FILE(GLOB CPPSOURCES ${name}*.cpp)
  app(${name} ${CPPSOURCES} DISTSAFE)
  target_link_libraries(${name} distbench)
  if(USE_BARE_MPI)
    target_compile_definitions(${name} PRIVATE __GALOIS_BARE_MPI_COMMUNICATION__=1)
  endif()
endfunction()

set(RESULT_CHECKER ${CMAKE_SOURCE_DIR}/scripts/result_checker.py)
cmake_host_system_information(RESULT HOSTNAME QUERY HOSTNAME)
function(testDist app input type part N np)
  math(EXPR t "(${N} / ${np})")
  string(REPLACE "_" ";" app_list ${app})
  list(GET app_list 0 app_id)
  set(OUTPUT ${BASEOUTPUT}/${input}.${app_id})
  if(EXISTS ${OUTPUT})
    add_test(run-${app}-${type}-${input}-${part}-${np} mpiexec -n=${np} ./${app}  ${ARGN} -t=${t} -partition=${part} -verify)
    add_test(verify-${app}-${type}-${input}-${part}-${np} python ${RESULT_CHECKER} -t=0.01 -sort=1 -delete=1 ${OUTPUT} output_${HOSTNAME}_*.log)
  else()
    add_test(run-${app}-${type}-${input}-${part}-${np} mpiexec -n=${np} ./${app}  ${ARGN} -t=${t} -partition=${part})
  endif()
  set_tests_properties(run-${app}-${type}-${input}-${part}-${np} PROPERTIES ENVIRONMENT GALOIS_DO_NOT_BIND_THREADS=1)
endfunction(testDist)

function(testDistPartition app input type test_gpu part)
  if(NUM_TEST_THREADS)
    set(N ${NUM_TEST_THREADS})
  else()
    cmake_host_system_information(RESULT N QUERY NUMBER_OF_PHYSICAL_CORES)
  endif()
  if (N EQUAL 0)
    set(N 1)
  endif()
  if (N LESS 8)
    foreach(np RANGE 1 ${N})
      testDist(${app} ${input} ${type}-cpu ${part} ${N} ${np} ${ARGN})
    endforeach(np)
  else()
    foreach(div RANGE 1 4)
      math(EXPR np "(${N} * ${div})/ 4")
      if ((div EQUAL 1) AND NOT (np EQUAL 1))
        testDist(${app} ${input} ${type}-cpu ${part} ${N} 1 ${ARGN})
      endif()
      testDist(${app} ${input} ${type}-cpu ${part} ${N} ${np} ${ARGN})
    endforeach(div)
  endif()

  if(ENABLE_HETERO_GALOIS)
    if(test_gpu EQUAL 1)
      set(G ${NUM_TEST_GPUS})
      if (0 LESS G)
        if (G LESS N)
          set(PSET "-pset=")
          foreach(np RANGE 1 ${G})
            set(PSET "${PSET}g")
            testDist(${app} ${input} ${type}-gpu ${part} ${N} ${np} ${ARGN} -num_nodes=1 ${PSET})
          endforeach(np)
          set(PSET "${PSET}c")
          math(EXPR np "(${G} + 1)")
          testDist(${app} ${input} ${type}-cpugpu ${part} ${N} ${np} ${ARGN} -num_nodes=1 ${PSET} -scalegpu=3)
        endif()
      endif()
    endif()
  endif()
endfunction(testDistPartition)

function(testDistExec app input type test_gpu)
  testDistPartition(${app} ${input} ${type} ${test_gpu} oec ${ARGN})
  testDistPartition(${app} ${input} ${type} ${test_gpu} iec ${ARGN})
  testDistPartition(${app} ${input} ${type} ${test_gpu} cvc ${ARGN})
  testDistPartition(${app} ${input} ${type} ${test_gpu} cvc-iec ${ARGN})
  testDistPartition(${app} ${input} ${type} ${test_gpu} hovc ${ARGN})
  testDistPartition(${app} ${input} ${type} ${test_gpu} hivc ${ARGN})
endfunction(testDistExec)

function(testDistSyncOnlyNoGPUApp app input)
  testDistExec(${app} ${input} sync 0 ${ARGN})
endfunction(testDistSyncOnlyNoGPUApp)

function(testDistSyncOnlyApp app input)
  testDistExec(${app} ${input} sync 1 ${ARGN})
endfunction(testDistSyncOnlyApp)

function(testDistApp app input)
  testDistExec(${app} ${input} sync 1 ${ARGN} -exec=Sync)
  testDistExec(${app} ${input} async 1 ${ARGN} -exec=Async)
endfunction(testDistApp)

add_subdirectory(bc)
add_subdirectory(bfs)
add_subdirectory(cc)
add_subdirectory(kcore)
add_subdirectory(pagerank)
add_subdirectory(sgd)
add_subdirectory(sssp)

add_subdirectory(partition)
