Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
#include <gemm_epilogue_traits.h>
Public Types | |
typedef EpilogueFunctor_::Scalar | Scalar |
The scalar. More... | |
typedef GemmConfig_::OutputTile | OutputTile |
The output tile. More... | |
typedef Shape< 1, GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH/GemmConfig_::kAccumulatorsPerLdsB, GemmConfig_::kAccumulatorsPerLdsB > | Iterations |
The number of iterations in the epilogue. More... | |
typedef Shape< 0, GemmConfig_::kAccumulatorsPerLdsB *(GemmConfig_::Warps::kH *GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1), 0 > | Delta |
typedef EpilogueFunctor_ | Functor |
The functor to do the math in the epilogue. More... | |
typedef GemmSharedStoreTileDTraits< typename Functor::ScalarAccum, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::kScalarsPerStsD, 128/sizeof(typename GemmConfig_::ScalarD)/GemmConfig_::kScalarsPerStsD/2 *GemmConfig_::kScalarsPerStsD > | SharedStoreTileTraits |
The traits class to build the iterator to store to shared memory for D. More... | |
typedef TileStoreIterator< SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > | SharedStoreIteratorD |
The iterator to store D to shared memory. More... | |
typedef Copy< typename SharedStoreIteratorD::Fragment > | SharedStoreTransformerD |
The shared store transformer for D. More... | |
typedef GemmSharedLoadTileDTraits< typename Functor::ScalarAccum, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::kScalarsPerLdsD, SharedStoreTileTraits::kSkew > | SharedLoadTileTraits |
The traits class to build the iterator to load from shared memory for D. More... | |
typedef TileLoadIterator< SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared > | SharedLoadIteratorD |
The iterator to load D from shared memory. More... | |
typedef SharedLoadStream< SharedLoadIteratorD > | SharedLoadStreamD |
The stream to load D. More... | |
typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarC const, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerLdgC > | GlobalLoadTileTraits |
The traits class to build the iterator to load data from global memory for C^N. More... | |
typedef GemmGlobalIteratorCd< GlobalLoadTileTraits, Index_ > | GlobalLoadIteratorC |
The iterator to load C. More... | |
typedef Copy< typename GlobalLoadIteratorC::Fragment > | GlobalTransformerC |
The transformer for C. More... | |
typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarD, Shape< 1, GemmConfig_::OutputTile::kH/ShapeCount< Iterations >::kCount, GemmConfig_::OutputTile::kW >, Shape< 1, ShapeCount< typename GemmConfig_::Warps >::kCount, GemmConfig_::kWarpSize >, Iterations::kW, GemmConfig_::kScalarsPerStgD > | GlobalStoreTileTraits |
The traits class to build the iterator to store data to global memory for D^N. More... | |
typedef GemmGlobalIteratorCd< GlobalStoreTileTraits, Index_ > | GlobalStoreIteratorD |
The iterator to store D. More... | |
typedef Copy< typename GlobalStoreIteratorD::Fragment > | GlobalTransformerD |
The transformer for D. More... | |
typedef Shape<0, GemmConfig_::kAccumulatorsPerLdsB*( GemmConfig_::Warps::kH* GemmConfig_::MultiplyAdd::ThreadsPerWarp::kH - 1), 0> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Delta |
typedef EpilogueFunctor_ cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Functor |
typedef GemmGlobalIteratorCd<GlobalLoadTileTraits, Index_> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadIteratorC |
typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarC const, Shape<1, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, Iterations::kW, GemmConfig_::kScalarsPerLdgC> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalLoadTileTraits |
typedef GemmGlobalIteratorCd<GlobalStoreTileTraits, Index_> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreIteratorD |
typedef GemmGlobalTileCdTraits< typename GemmConfig_::ScalarD, Shape<1, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::OutputTile::kW>, Shape<1, ShapeCount<typename GemmConfig_::Warps>::kCount, GemmConfig_::kWarpSize>, Iterations::kW, GemmConfig_::kScalarsPerStgD> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalStoreTileTraits |
typedef Copy<typename GlobalLoadIteratorC::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerC |
typedef Copy<typename GlobalStoreIteratorD::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::GlobalTransformerD |
typedef Shape<1, GemmConfig_::MultiplyAdd::AccumulatorsPerThread::kH / GemmConfig_::kAccumulatorsPerLdsB, GemmConfig_::kAccumulatorsPerLdsB> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Iterations |
typedef GemmConfig_::OutputTile cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::OutputTile |
typedef EpilogueFunctor_::Scalar cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::Scalar |
typedef TileLoadIterator<SharedLoadTileTraits, typename SharedLoadTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadIteratorD |
typedef SharedLoadStream<SharedLoadIteratorD> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadStreamD |
typedef GemmSharedLoadTileDTraits< typename Functor::ScalarAccum, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::OutputTile::kH / ShapeCount<Iterations>::kCount, GemmConfig_::kScalarsPerLdsD, SharedStoreTileTraits::kSkew> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedLoadTileTraits |
typedef TileStoreIterator<SharedStoreTileTraits, typename SharedStoreTileTraits::Scalar, IteratorAdvance::kH, MemorySpace::kShared> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreIteratorD |
typedef GemmSharedStoreTileDTraits< typename Functor::ScalarAccum, typename GemmConfig_::OutputTile, typename GemmConfig_::Warps, typename GemmConfig_::MultiplyAdd::ThreadsPerWarp, GemmConfig_::kScalarsPerStsD, 128 / sizeof(typename GemmConfig_::ScalarD) / GemmConfig_::kScalarsPerStsD / 2 * GemmConfig_::kScalarsPerStsD> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTileTraits |
typedef Copy<typename SharedStoreIteratorD::Fragment> cutlass::gemm::GemmEpilogueTraitsHelper< GemmConfig_, EpilogueFunctor_, Index_ >::SharedStoreTransformerD |