|
Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <gemm_epilogue_traits.h>
Public Types | |
| typedef EpilogueFunctor_::Scalar | Scalar |
| The scalar. More... | |
| typedef GemmConfig_::OutputTile | OutputTile |
| The output tile. More... | |
| typedef Shape< 1, GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH/GemmConfig_::kAccumulatorsPerLdsB, GemmConfig_::kAccumulatorsPerLdsB > | Iterations |
| The number of iterations in the epilogue. More... | |
| typedef Shape< 0, GemmConfig_::kAccumulatorsPerLdsB *(GemmConfig_::Warps::kH *GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1), 0 > | Delta |
| typedef EpilogueFunctor_ | Functor |
| The functor to do the math in the epilogue. More... | |
| typedef GemmSharedStoreTileDTraits< typename Functor::ScalarAccum, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::kScalarsPerStsD, 128/sizeof(typename GemmConfig_::ScalarD)/GemmConfig_::kScalarsPerStsD/2 *GemmConfig_::kScalarsPerStsD > | SharedStoreTileTraits |
| The traits class to build the iterator to store to shared memory for D. More... | |
| typedef TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > | SharedStoreIteratorD |
| The iterator to store D to shared memory. More... | |
| typedef Copy< typename SharedStoreIteratorD::Fragment > | SharedStoreTransformerD |
| The shared store transformer for D. More... | |
| typedef GemmSharedLoadTileDTraits< typename Functor::ScalarAccum, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::kScalarsPerLdsD, SharedStoreTileTraits::kSkew > | SharedLoadTileTraits |
| The traits class to build the iterator to load from shared memory for D. More... | |
| typedef TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > | SharedLoadIteratorD |
| The iterator to load D from shared memory. More... | |
| typedef SharedLoadStream< SharedLoadIteratorD > | SharedLoadStreamD |
| The stream to load D. More... | |
| typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarC const, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerLdgC > | GlobalLoadTileTraits |
| The traits class to build the iterator to load data from global memory for C^N. More... | |
| typedef GemmGlobalIteratorCd< GlobalLoadTileTraits, Index_ > | GlobalLoadIteratorC |
| The iterator to load C. More... | |
| typedef Copy< typename GlobalLoadIteratorC::Fragment > | GlobalTransformerC |
| The transformer for C. More... | |
| typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarD, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerStgD > | GlobalStoreTileTraits |
| The traits class to build the iterator to store data to global memory for D^N. More... | |
| typedef GemmGlobalIteratorCd< GlobalStoreTileTraits, Index_ > | GlobalStoreIteratorD |
| The iterator to store D. More... | |
| typedef Copy< typename GlobalStoreIteratorD::Fragment > | GlobalTransformerD |
| The transformer for D. More... | |
| typedef Shape<0, GemmConfig_::kAccumulatorsPerLdsB*( GemmConfig_::Warps::kH* GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1), 0> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Delta |
| typedef EpilogueFunctor_ cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Functor |
| typedef GemmGlobalIteratorCd<GlobalLoadTileTraits, Index_> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadIteratorC |
| typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarC const, Shape<1, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, Iterations::kW, GemmConfig_::kScalarsPerLdgC> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadTileTraits |
| typedef GemmGlobalIteratorCd<GlobalStoreTileTraits, Index_> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreIteratorD |
| typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarD, Shape<1, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, Iterations::kW, GemmConfig_::kScalarsPerStgD> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreTileTraits |
| typedef Copy<typename GlobalLoadIteratorC::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerC |
| typedef Copy<typename GlobalStoreIteratorD::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerD |
| typedef Shape<1, GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH / GemmConfig_::kAccumulatorsPerLdsB, GemmConfig_::kAccumulatorsPerLdsB> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Iterations |
| typedef GemmConfig_::OutputTile cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::OutputTile |
| typedef EpilogueFunctor_::Scalar cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Scalar |
| typedef TileLoadIterator<SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadIteratorD |
| typedef SharedLoadStream<SharedLoadIteratorD> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadStreamD |
| typedef GemmSharedLoadTileDTraits< typename Functor::ScalarAccum, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::kScalarsPerLdsD, SharedStoreTileTraits::kSkew> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadTileTraits |
| typedef TileStoreIterator<SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreIteratorD |
| typedef GemmSharedStoreTileDTraits< typename Functor::ScalarAccum, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::kScalarsPerStsD, 128 / sizeof(typename GemmConfig_::ScalarD) / GemmConfig_::kScalarsPerStsD / 2 * GemmConfig_::kScalarsPerStsD> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTileTraits |
| typedef Copy<typename SharedStoreIteratorD::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTransformerD |
1.8.14