# CMakeLists.txt

cmake_minimum_required(VERSION 3.18)
project(auction_kernel LANGUAGES C CXX CUDA)

# Find the required Python package
find_package(Python COMPONENTS Interpreter Development.Module REQUIRED)
if(DEFINED ENV{XLA_DIR})
    set(XLA_DIR $ENV{XLA_DIR})
  else()
    execute_process(
      COMMAND "${Python_EXECUTABLE}"
              "-c" "from jax import ffi; print(ffi.include_dir())"
      OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE XLA_DIR)
  endif()
message(STATUS "XLA include directory: ${XLA_DIR}")

# Set the CUDA architecture if not already defined
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
  message(STATUS "CMAKE_CUDA_ARCHITECTURES not defined, setting to 'native'")
  set(CMAKE_CUDA_ARCHITECTURES native)
endif()

# Build the shared library from auction_batch.cu
add_library(auction_kernel SHARED "auction_batch.cu")
target_include_directories(auction_kernel PUBLIC ${XLA_DIR})
target_compile_features(auction_kernel PUBLIC cxx_std_17)

# CUDA specific options
set_target_properties(auction_kernel PROPERTIES
    CUDA_SEPARABLE_COMPILATION ON  # Enable separable compilation
    # CUDA_ARCHITECTURES "all"        # Compile for all supported architectures
)

# Specify any additional CUDA compilation flags (if needed)
target_compile_options(auction_kernel PUBLIC $<$<COMPILE_LANGUAGE:CUDA>:
    -lineinfo             # Add line info to generated code (useful for debugging)
    -Xcompiler=-fPIC      # Ensure position-independent code for shared libraries
>)


install(TARGETS auction_kernel LIBRARY DESTINATION ${CMAKE_CURRENT_LIST_DIR})

# add_executable(auction auction_batch.cu)
# target_include_directories(auction PUBLIC ${XLA_DIR})
# target_compile_features(auction PUBLIC cxx_std_17)