Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Static Public Attributes | List of all members
cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ > Struct Template Reference

#include <gemm_shared_tile.h>

Classes

struct  ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 

Public Types

typedef platform::remove_const< Scalar_ >::type Scalar
 The scalar. More...
 
typedef Scalar_ * Pointer
 The pointer. More...
 
typedef OutputTile_ OutputTile
 The dimension of the output tile. More...
 
typedef Warps_ Warps
 The warps in the tile. More...
 
typedef ThreadsPerWarp_ ThreadsPerWarp
 The threads in the warps. More...
 
typedef Shape< 1, 2, kScalarsPerRow/kAccessSize, kAccessSizeTile
 The tile. More...
 
typedef Shape< 1, 1, kScalarsPerThread/kAccessSizeIterations
 The number of iterations needed to store the tile. More...
 
typedef Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kAccessSizeDelta
 The strides in each dimension between different loads/stores. More...
 
typedef Shape< 0, 0, Warps::kW *ThreadsPerWarp::kW *kAccessSizeImmediateOffsetStrides
 The strides in each dimension between different loads/stores. More...
 

Static Public Attributes

static int const kAccessSize = kScalarsPerSts_
 The number of scalars per LDG/STG. More...
 
static int const kSkew = kSkew_
 The skew. More...
 
static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
 The memory space. More...
 
static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
 The number of scalars per thread. More...
 
static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
 The number of threads. More...
 
static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
 The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). More...
 

Member Typedef Documentation

◆ Delta

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Delta

◆ ImmediateOffsetStrides

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef Shape<0, 0, Warps::kW * ThreadsPerWarp::kW * kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ImmediateOffsetStrides

◆ Iterations

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef Shape<1, 1, kScalarsPerThread / kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Iterations

◆ OutputTile

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef OutputTile_ cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::OutputTile

◆ Pointer

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef Scalar_* cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Pointer

◆ Scalar

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef platform::remove_const<Scalar_>::type cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Scalar

◆ ThreadsPerWarp

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::ThreadsPerWarp

◆ Tile

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef Shape<1, 2, kScalarsPerRow / kAccessSize, kAccessSize> cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Tile

◆ Warps

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
typedef Warps_ cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::Warps

Member Data Documentation

◆ kAccessSize

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kAccessSize = kScalarsPerSts_
static

◆ kMemorySpace

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
MemorySpace::Kind const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kMemorySpace = MemorySpace::kShared
static

◆ kScalarsPerRow

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
static

◆ kScalarsPerThread

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
static

◆ kSkew

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kSkew = kSkew_
static

◆ kThreads

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kScalarsPerSts_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedStoreTileDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kScalarsPerSts_, kSkew_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
static

The documentation for this struct was generated from the following file: