Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <gemm_epilogue.h>
Public Types | |
typedef GemmEpilogueTraits_ | Traits |
The traits class. More... | |
typedef Traits::Params | Params |
The params. More... | |
typedef Traits::SharedStorage | SharedStorage |
The shared storage. More... | |
typedef Traits::OutputTile | OutputTile |
The output tile. More... | |
typedef Traits::Iterations | Iterations |
The number of iterations. More... | |
typedef Traits::Accumulators | Accumulators |
The accumulators. More... | |
typedef Traits::Scalar | Scalar |
The scalar. More... | |
typedef Traits::Functor | Functor |
The functor in charge of the math. More... | |
typedef Traits::GlobalLoadIteratorC | GlobalLoadIteratorC |
We do not support 3D or 4D shapes. More... | |
typedef Traits::GlobalTransformerC | GlobalTransformerC |
The transformer for C. More... | |
typedef Traits::GlobalTransformerD | GlobalTransformerD |
The transformer for D. More... | |
typedef Traits::GlobalStoreIteratorD | GlobalStoreIteratorD |
The iterator for D in global memory. More... | |
typedef Traits::SharedStoreIteratorD | SharedStoreIteratorD |
The iterator to store D in shared memory. More... | |
typedef Traits::SharedStoreTransformerD | SharedStoreTransformerD |
The shared store transformer for D. More... | |
typedef Traits::SharedLoadStreamD | SharedLoadStreamD |
The iterator to load D in shared memory. More... | |
typedef Traits::Index | Index |
The index. More... | |
typedef GlobalLoadIteratorC::Scalar | ScalarC |
The scalar for C. More... | |
typedef GlobalStoreIteratorD::Scalar | ScalarD |
The scalar for D. More... | |
Public Member Functions | |
CUTLASS_DEVICE | GemmEpilogue (Params const ¶ms_, SharedStorage &shared_storage_, Coord< 3 > const &_problem_size) |
Ctor. More... | |
CUTLASS_DEVICE void | epilogue (Accumulators &accumulators, Coord< 3 > const &block=make_Coord(0, 0, 0), int batch_id=0) |
Execute the epilogue. More... | |
template<bool kSourceRequired> | |
CUTLASS_DEVICE void | epilogue_with_or_without_beta (Accumulators &accumulators, Coord< 3 > const &block, int batch_id) |
CUTLASS_DEVICE void | shared_load_fence () |
The memory fence for shared loads. More... | |
CUTLASS_DEVICE void | shared_store_fence () |
The memory fence for shared stores. More... | |
Public Attributes | |
Params const & | params |
The params. More... | |
SharedStorage & | shared_storage |
The shared storage. More... | |
Coord< 3 > | problem_size |
The dimensions of the GEMM. More... | |
Functor | functor |
typedef Traits::Accumulators cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Accumulators |
typedef Traits::Functor cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Functor |
typedef Traits::GlobalLoadIteratorC cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalLoadIteratorC |
The iterator for C in global memory.
typedef Traits::GlobalStoreIteratorD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalStoreIteratorD |
typedef Traits::GlobalTransformerC cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalTransformerC |
typedef Traits::GlobalTransformerD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::GlobalTransformerD |
typedef Traits::Index cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Index |
typedef Traits::Iterations cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Iterations |
typedef Traits::OutputTile cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::OutputTile |
typedef Traits::Params cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Params |
typedef Traits::Scalar cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Scalar |
typedef GlobalLoadIteratorC::Scalar cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::ScalarC |
typedef GlobalStoreIteratorD::Scalar cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::ScalarD |
typedef Traits::SharedLoadStreamD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedLoadStreamD |
typedef Traits::SharedStorage cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedStorage |
typedef Traits::SharedStoreIteratorD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedStoreIteratorD |
typedef Traits::SharedStoreTransformerD cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::SharedStoreTransformerD |
typedef GemmEpilogueTraits_ cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::Traits |
|
inline |
|
inline |
|
inline |
|
inline |
|
inline |
Functor cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::functor |
Params const& cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::params |
Coord<3> cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::problem_size |
SharedStorage& cutlass::gemm::GemmEpilogue< GemmEpilogueTraits_ >::shared_storage |