
// This file is automatically generated
#include "kernels/kernel_v21.cuh"
#include <cuda_bf16.h>
#include <cuda_fp16.h>

#ifdef ASYNC_REASONING_ENABLE_FLOAT
template cudaError_t v21::async_reasoning_attention_gpu<float>(
    float* out, float scale,
    const int* locations, const float* queries,
    const int* fragment_lengths,
    const float** key_fragments,
    const float** value_fragments,
    Shape shape
);
#endif

#ifdef ASYNC_REASONING_ENABLE_HALF
template cudaError_t v21::async_reasoning_attention_gpu<half>(
    half* out, float scale,
    const int* locations, const half* queries,
    const int* fragment_lengths,
    const half** key_fragments,
    const half** value_fragments,
    Shape shape
);
#endif

#ifdef ASYNC_REASONING_ENABLE_BFLOAT
template cudaError_t v21::async_reasoning_attention_gpu<nv_bfloat16>(
    nv_bfloat16* out, float scale,
    const int* locations, const nv_bfloat16* queries,
    const int* fragment_lengths,
    const nv_bfloat16** key_fragments,
    const nv_bfloat16** value_fragments,
    Shape shape
);
#endif

