Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
Classes | Public Types | Static Public Attributes | List of all members
cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ > Struct Template Reference

#include <gemm_shared_tile.h>

Classes

struct  ThreadOffset
 Computes the thread offset in (H, W) based on thread ID. More...
 

Public Types

typedef nv_std::remove_const< Scalar_ >::type Scalar
 The scalar. More...
 
typedef Scalar_ * Pointer
 The pointer. More...
 
typedef OutputTile_ OutputTile
 The dimension of the output tile. More...
 
typedef Warps_ Warps
 The warps in the tile. More...
 
typedef ThreadsPerWarp_ ThreadsPerWarp
 The threads in the warps. More...
 
typedef Shape< 1, 2, kScalarsPerRow/kScalarsPerLds, kScalarsPerLdsTile
 The tile. More...
 
typedef Shape< kIterationsD, kIterationsH, OutputTile::kW/kWarpSize/kScalarsPerLdsIterations
 The number of iterations needed to store the tile. More...
 
typedef Shape< OutputTile::kW, kScalarsPerRow, kWarpSize *kScalarsPerLdsDelta
 The strides in each dimension between different loads/stores. More...
 

Static Public Attributes

static int const kScalarsPerLds = kScalarsPerLds_
 The number of scalars per LDG/STG. More...
 
static int const kSkew = kSkew_
 The skew. More...
 
static MemorySpace::Kind const kMemorySpace = MemorySpace::kShared
 The memory space. More...
 
static int const kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
 The number of scalars per thread. More...
 
static int const kThreads = ShapeCount<Warps>::kCount * kWarpSize
 The number of threads. More...
 
static int const kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
 The number of scalars per row. We build a tile with 2 rows (to avoid bank conflicts). More...
 
static int const kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount
 
static int const kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2
 
static int const kIterationsD = kIterationsInHPerWarp / kIterationsH
 

Member Typedef Documentation

◆ Delta

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
typedef Shape<OutputTile::kW, kScalarsPerRow, kWarpSize * kScalarsPerLds> cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Delta

◆ Iterations

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
typedef Shape<kIterationsD, kIterationsH, OutputTile::kW / kWarpSize / kScalarsPerLds> cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Iterations

◆ OutputTile

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
typedef OutputTile_ cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::OutputTile

◆ Pointer

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
typedef Scalar_* cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Pointer

◆ Scalar

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
typedef nv_std::remove_const<Scalar_>::type cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Scalar

◆ ThreadsPerWarp

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
typedef ThreadsPerWarp_ cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::ThreadsPerWarp

◆ Tile

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
typedef Shape<1, 2, kScalarsPerRow / kScalarsPerLds, kScalarsPerLds> cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Tile

◆ Warps

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
typedef Warps_ cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::Warps

Member Data Documentation

◆ kIterationsD

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsD = kIterationsInHPerWarp / kIterationsH
static

◆ kIterationsH

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsH = kIterationsInHPerWarp == 1 ? 1 : 2
static

◆ kIterationsInHPerWarp

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kIterationsInHPerWarp = kTileH_ / ShapeCount<Warps>::kCount
static

◆ kMemorySpace

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
MemorySpace::Kind const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kMemorySpace = MemorySpace::kShared
static

◆ kScalarsPerLds

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerLds = kScalarsPerLds_
static

◆ kScalarsPerRow

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerRow = kThreads / 2 * kScalarsPerThread + kSkew
static

◆ kScalarsPerThread

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kScalarsPerThread = OutputTile_::kW / Warps::kW / ThreadsPerWarp::kW
static

◆ kSkew

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kSkew = kSkew_
static

◆ kThreads

template<typename Scalar_ , typename OutputTile_ , typename Warps_ , typename ThreadsPerWarp_ , int kTileH_, int kScalarsPerLds_, int kSkew_ = 0>
int const cutlass::gemm::GemmSharedLoadIteratorDTraits< Scalar_, OutputTile_, Warps_, ThreadsPerWarp_, kTileH_, kScalarsPerLds_, kSkew_ >::kThreads = ShapeCount<Warps>::kCount * kWarpSize
static

The documentation for this struct was generated from the following file: