# Makefile for FPGA-GPU P2P Transfer Demo
#
# Build targets:
#   make simple        - Build simple demo (no xclbin needed) - RECOMMENDED
#   make host          - Build host application (requires xclbin)
#   make fpga_kernel   - Build FPGA xclbin (hardware, takes hours)
#   make clean         - Clean build artifacts

# ============================================================================
# Configuration
# ============================================================================

# FPGA Platform
PLATFORM := xilinx_u55c_gen3x16_xdma_3_202210_1

# Tool paths (adjust if needed)
XILINX_VITIS ?= /opt/xilinx/Vitis/2024.2
XILINX_XRT ?= /opt/xilinx/xrt
ROCM_PATH ?= /opt/rocm

# Compilers
CXX := g++
HIPCC := $(ROCM_PATH)/bin/hipcc
VPP := $(XILINX_VITIS)/bin/v++

# Build configuration
TARGET ?= hw
BUILD_DIR := build_$(TARGET)

# ============================================================================
# Flags
# ============================================================================

# Common C++ flags
CXXFLAGS := -std=c++17 -O2 -g -Wall

# XRT include/lib (with OpenCL support)
XRT_INC := -I$(XILINX_XRT)/include -I$(XILINX_XRT)/include/CL
XRT_LIB := -L$(XILINX_XRT)/lib -lxrt_coreutil -lxilinxopencl -luuid -lpthread

# ROCm/HIP include/lib
HIP_INC := -I$(ROCM_PATH)/include
HIP_LIB := -L$(ROCM_PATH)/lib -lamdhip64

# Combined flags for host
HOST_INC := $(XRT_INC) $(HIP_INC)
HOST_LIB := $(XRT_LIB) $(HIP_LIB)

# ============================================================================
# Source files
# ============================================================================

HOST_SRCS := host.cpp
GPU_SRCS := gpu_kernel.hip
FPGA_SRCS := fpga_kernel.cpp

# ============================================================================
# Output files
# ============================================================================

HOST_EXE := p2p_transfer
SIMPLE_EXE := p2p_simple
GPU_OBJ := $(BUILD_DIR)/gpu_kernel.o

# FPGA kernel outputs - one XO per kernel
XO_WRITE := $(BUILD_DIR)/fpga_write_pattern.xo
XO_READ := $(BUILD_DIR)/fpga_read_verify.xo
XO_COPY := $(BUILD_DIR)/fpga_memcpy.xo
FPGA_XCLBIN := $(BUILD_DIR)/p2p_demo.xclbin

# ============================================================================
# Build targets
# ============================================================================

.PHONY: all host simple fpga_kernel fpga_kernel_emu clean help

all: simple

help:
	@echo "FPGA-GPU P2P Transfer Demo - Build System"
	@echo ""
	@echo "Targets:"
	@echo "  make simple          - Build simple demo (no xclbin needed) [RECOMMENDED]"
	@echo "  make host            - Build full host application (needs xclbin)"
	@echo "  make fpga_kernel     - Build FPGA xclbin (takes 2-4 hours)"
	@echo "  make fpga_kernel_emu - Build for hardware emulation (~30 min)"
	@echo "  make clean           - Clean build artifacts"
	@echo ""
	@echo "Quick start:"
	@echo "  source env.sh && make simple && ./p2p_simple --fpga 81:00.1"

# ============================================================================
# Build directory
# ============================================================================

$(BUILD_DIR):
	mkdir -p $(BUILD_DIR)
	mkdir -p $(BUILD_DIR)/temp
	mkdir -p $(BUILD_DIR)/logs
	mkdir -p $(BUILD_DIR)/reports

# ============================================================================
# Host applications
# ============================================================================

simple: $(SIMPLE_EXE)

host: $(HOST_EXE)

$(SIMPLE_EXE): p2p_simple.cpp | $(BUILD_DIR)
	$(HIPCC) $(CXXFLAGS) $(HOST_INC) $< $(HOST_LIB) -o $@
	@echo ""
	@echo "====================================================="
	@echo "Simple P2P demo built: $(SIMPLE_EXE)"
	@echo "Run with: ./$(SIMPLE_EXE) --fpga 81:00.1 --gpu 0"
	@echo "====================================================="

# For the full host app, compile GPU kernels and host together (OpenCL API)
$(HOST_EXE): $(HOST_SRCS) $(GPU_SRCS) | $(BUILD_DIR)
	$(HIPCC) $(CXXFLAGS) $(HOST_INC) -DCL_HPP_TARGET_OPENCL_VERSION=120 -DCL_TARGET_OPENCL_VERSION=120 $(HOST_SRCS) $(GPU_SRCS) $(HOST_LIB) -o $@
	@echo ""
	@echo "====================================================="
	@echo "Full P2P demo built: $(HOST_EXE)"
	@echo "Run with: ./$(HOST_EXE) --fpga 81:00.1 --gpu 0 --xclbin <path/to/your.xclbin>"
	@echo "====================================================="
	@echo ""
	@echo "Host application built: $(HOST_EXE)"

# ============================================================================
# FPGA kernel (Vitis compilation)
# ============================================================================

# Vitis common flags
VPP_COMMON := --platform $(PLATFORM) --target $(TARGET) --save-temps

fpga_kernel: $(FPGA_XCLBIN)
	cp $(FPGA_XCLBIN) p2p_demo.xclbin
	@echo ""
	@echo "====================================================="
	@echo "FPGA bitstream built: p2p_demo.xclbin"
	@echo "====================================================="

fpga_kernel_emu:
	$(MAKE) TARGET=hw_emu fpga_kernel

# Compile each kernel separately (Vitis requires one -k per invocation)
$(XO_WRITE): $(FPGA_SRCS) | $(BUILD_DIR)
	$(VPP) $(VPP_COMMON) -c -k fpga_write_pattern \
		--temp_dir $(BUILD_DIR)/temp \
		--log_dir $(BUILD_DIR)/logs \
		--report_dir $(BUILD_DIR)/reports \
		-o $@ $<

$(XO_READ): $(FPGA_SRCS) | $(BUILD_DIR)
	$(VPP) $(VPP_COMMON) -c -k fpga_read_verify \
		--temp_dir $(BUILD_DIR)/temp \
		--log_dir $(BUILD_DIR)/logs \
		--report_dir $(BUILD_DIR)/reports \
		-o $@ $<

$(XO_COPY): $(FPGA_SRCS) | $(BUILD_DIR)
	$(VPP) $(VPP_COMMON) -c -k fpga_memcpy \
		--temp_dir $(BUILD_DIR)/temp \
		--log_dir $(BUILD_DIR)/logs \
		--report_dir $(BUILD_DIR)/reports \
		-o $@ $<

# Link all XO files to create XCLBIN
$(FPGA_XCLBIN): $(XO_WRITE) $(XO_READ) $(XO_COPY) connectivity.cfg
	$(VPP) $(VPP_COMMON) -l --config connectivity.cfg \
		--temp_dir $(BUILD_DIR)/temp \
		--log_dir $(BUILD_DIR)/logs \
		--report_dir $(BUILD_DIR)/reports \
		-o $@ $(XO_WRITE) $(XO_READ) $(XO_COPY)

# Generate connectivity configuration
connectivity.cfg:
	@echo "[connectivity]" > $@
	@echo "sp=fpga_write_pattern_1.out:HBM[0]" >> $@
	@echo "sp=fpga_read_verify_1.in:HBM[0]" >> $@
	@echo "sp=fpga_read_verify_1.error_count:HBM[1]" >> $@
	@echo "sp=fpga_memcpy_1.in:HBM[0]" >> $@
	@echo "sp=fpga_memcpy_1.out:HBM[1]" >> $@
	@echo "nk=fpga_write_pattern:1" >> $@
	@echo "nk=fpga_read_verify:1" >> $@
	@echo "nk=fpga_memcpy:1" >> $@

# ============================================================================
# Clean
# ============================================================================

clean:
	rm -rf $(HOST_EXE) $(SIMPLE_EXE)
	rm -rf build_hw build_hw_emu build_sw_emu
	rm -rf connectivity.cfg emconfig.json
	rm -rf *.xclbin *.xo *.log *.info
	rm -rf _x .Xil .run *.ltx
	rm -rf *.csv *.protoinst *.wdb *.wcfg

.PHONY: clean
