from torch.utils import cpp_extension
import pathlib
import os
import subprocess


def _get_cuda_bare_metal_version(cuda_dir):
    raw_output = subprocess.check_output(
        [cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True
    )
    output = raw_output.split()
    release_idx = output.index("release") + 1
    release = output[release_idx].split(".")
    bare_metal_major = release[0]
    bare_metal_minor = release[1][0]

    return raw_output, bare_metal_major, bare_metal_minor


def _create_build_dir(buildpath):
    try:
        os.mkdir(buildpath)
    except OSError:
        if not os.path.isdir(buildpath):
            print(f"Creation of the build directory {buildpath} failed")


# Check if cuda 11 is installed for compute capability 8.0
cc_flag = []
_, bare_metal_major, bare_metal_minor = _get_cuda_bare_metal_version(
    cpp_extension.CUDA_HOME
)
if int(bare_metal_major) >= 11:
    cc_flag.append("-gencode")
    cc_flag.append("arch=compute_80,code=sm_80")
    if int(bare_metal_minor) >= 7:
        cc_flag.append("-gencode")
        cc_flag.append("arch=compute_90,code=sm_90")

# Build path
srcpath = pathlib.Path(__file__).parent.absolute()
buildpath = srcpath / "build"
_create_build_dir(buildpath)


def _cpp_extention_load_helper(name, sources, extra_cuda_flags):
    return cpp_extension.load(
        name=name,
        sources=sources,
        build_directory=buildpath,
        extra_cflags=[
            "-O3",
        ],
        extra_cuda_cflags=[
            "-O3",
            "-gencode",
            "arch=compute_70,code=sm_70",
            "--use_fast_math",
        ]
        + extra_cuda_flags
        + cc_flag,
        verbose=1,
    )


extra_flags = []

cache_autogptq_cuda_256_sources = [
    "./cache_autogptq_cuda_256.cpp",
    "./cache_autogptq_cuda_kernel_256.cu",
]
cache_autogptq_cuda_256 = _cpp_extention_load_helper(
    "cache_autogptq_cuda_256", cache_autogptq_cuda_256_sources, extra_flags
)
