|
Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Implements a software-pipelined efficient batched reduction. D = alpha * Reduction(A) + beta * C. More...
#include <cuda.h>#include "cutlass/coord.h"#include "cutlass/util/platform.h"#include "cutlass/fragment.h"Go to the source code of this file.
Classes | |
| struct | cutlass::reduction::BatchedReduction< BatchedReductionTraits_ > |
Namespaces | |
| cutlass | |
| cutlass::reduction | |
Functions | |
| template<typename batched_reduction_ > | |
| __global__ | cutlass::reduction::__launch_bounds__ (batched_reduction_::Traits::kThreads, 1) void batched_reduction_kernel(typename batched_reduction_ |
1.8.14