Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <hgemm_traits.h>
Additional Inherited Members | |
![]() | |
typedef GemmTraits< Helper_::GemmConfig, Helper_::GlobalLoadStreamA, Helper_::GlobalLoadStreamB, Helper_::SharedLoadStreamA, Helper_::SharedLoadStreamB, Helper_::Epilogue, IdentityBlockSwizzle, Index_, Helper_::ClearAccumulators > | This_ |
This traits. More... | |
typedef cutlass::gemm::Gemm< This_ > | KernelClass |
The struct that consumes this Traits. More... | |
typedef Helper_::GemmConfig | GemmConfig |
The configuration. More... | |
typedef GemmConfig::OutputTile | OutputTile |
The output tile. More... | |
typedef Helper_::GlobalLoadStreamA | GlobalLoadStreamA |
The stream to load A from global memory to shared memory. More... | |
typedef Helper_::GlobalLoadStreamA ::Scalar | ScalarA |
The scalar for A. More... | |
typedef Helper_::GlobalLoadStreamB | GlobalLoadStreamB |
The stream to load B from global memory to shared memory. More... | |
typedef Helper_::GlobalLoadStreamB ::Scalar | ScalarB |
The scalar for B. More... | |
typedef Helper_::SharedLoadStreamA | SharedLoadStreamA |
The iterator for A to load from shared memory. More... | |
typedef Helper_::SharedLoadStreamB | SharedLoadStreamB |
The iterator for B to load from shared memory. More... | |
typedef GemmConfig::MultiplyAdd | MultiplyAdd |
The multiply-add functor. More... | |
typedef Helper_::Epilogue | Epilogue |
The epilogue. More... | |
typedef Epilogue::ScalarC | ScalarC |
The scalars in the epilogue. More... | |
typedef Epilogue::ScalarD | ScalarD |
typedef IdentityBlockSwizzle | BlockSwizzle |
The block swizzle to reorganize the grid. More... | |
typedef Index_ | Index |
The index. More... | |
typedef Helper_::ClearAccumulators | ClearAccumulators |
Clear the accumulators. More... | |
typedef GlobalLoadStreamPair< GlobalLoadStreamA, GlobalLoadStreamB, GemmConfig::kResidueInProlog > | GlobalLoadStream |
Assemble the global load streams for A/B. More... | |
typedef GlobalLoadStream::ThreadblockTileStorage | ThreadblockTileStorage |
Memory needed to store the threadblock-scoped GEMM tile. More... | |
typedef SharedStreamPair< SharedLoadStreamA, SharedLoadStreamB > | SharedStream |
Assemble the shared load streams for A/B. More... | |
![]() | |
static CUTLASS_DEVICE void | shared_load_fence (bool in_loop) |
The memory fence for shared loads. More... | |
static CUTLASS_DEVICE void | shared_store_fence (bool in_loop) |
The memory fence for shared stores. More... | |
![]() | |
static MatrixLayout::Kind const | kLayoutA |
The layout of A. More... | |
static MatrixLayout::Kind const | kLayoutB |
The layout of B. More... | |