Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | Static Public Member Functions | Public Attributes | Static Public Attributes | List of all members
cutlass::gemm::Gemm< GemmTraits_ > Struct Template Reference

#include <gemm.h>

Public Types

typedef Gemm< GemmTraits_ > This_
 This class. More...
 
typedef GemmTraits_ Traits
 The traits. More...
 
typedef Traits::SharedStorage SharedStorage
 The shared storage. More...
 
typedef Traits::ScalarA ScalarA
 The scalar for A. More...
 
typedef Traits::ScalarB ScalarB
 The scalar for B. More...
 
typedef Traits::Epilogue::Scalar ScalarEpilogue
 The scalar in the epilogue. More...
 
typedef Traits::Epilogue::ScalarC ScalarC
 The scalar for C. More...
 
typedef Traits::Epilogue::ScalarD ScalarD
 The scalar for D. More...
 
typedef Traits::Index Index
 The index. More...
 
typedef Traits::MultiplyAdd MultiplyAdd
 Define the mainloop iteration size. More...
 
typedef Traits::Params Params
 Use the params object defined in traits. More...
 

Public Member Functions

CUTLASS_DEVICE Gemm (Params const &params_, SharedStorage &shared_storage_)
 Ctor. More...
 
template<bool Residue, bool LastIteration>
CUTLASS_DEVICE void consume_tile (typename Traits::GlobalLoadStream &global_to_shared_stream, typename Traits::SharedStream &shared_load_stream, typename MultiplyAdd::Accumulators &accumulators, Index outer_k)
 Computes a warp-level GEMM on data held in shared memory. More...
 
CUTLASS_DEVICE void multiply_add ()
 Do the GEMM. More...
 

Static Public Member Functions

static __host__ cudaError_t launch (Params const &params, cudaStream_t stream=cudaStreamDefault)
 Support for NVRTC. More...
 
static __host__ cudaError_t launch (CUfunction kernel, Params const &params, CUstream stream=CU_STREAM_LEGACY)
 Launch the kernel. More...
 

Public Attributes

Params const & params
 The params. More...
 
SharedStorageshared_storage
 The shared storage. More...
 

Static Public Attributes

static int const kThreads = Traits::GemmConfig::kThreads
 The number of threads. More...
 
static Index const kWarpGemmSteps
 

Member Typedef Documentation

◆ Index

template<typename GemmTraits_ >
typedef Traits::Index cutlass::gemm::Gemm< GemmTraits_ >::Index

◆ MultiplyAdd

template<typename GemmTraits_ >
typedef Traits::MultiplyAdd cutlass::gemm::Gemm< GemmTraits_ >::MultiplyAdd

◆ Params

template<typename GemmTraits_ >
typedef Traits::Params cutlass::gemm::Gemm< GemmTraits_ >::Params

◆ ScalarA

template<typename GemmTraits_ >
typedef Traits::ScalarA cutlass::gemm::Gemm< GemmTraits_ >::ScalarA

◆ ScalarB

template<typename GemmTraits_ >
typedef Traits::ScalarB cutlass::gemm::Gemm< GemmTraits_ >::ScalarB

◆ ScalarC

template<typename GemmTraits_ >
typedef Traits::Epilogue::ScalarC cutlass::gemm::Gemm< GemmTraits_ >::ScalarC

◆ ScalarD

template<typename GemmTraits_ >
typedef Traits::Epilogue::ScalarD cutlass::gemm::Gemm< GemmTraits_ >::ScalarD

◆ ScalarEpilogue

template<typename GemmTraits_ >
typedef Traits::Epilogue::Scalar cutlass::gemm::Gemm< GemmTraits_ >::ScalarEpilogue

◆ SharedStorage

template<typename GemmTraits_ >
typedef Traits::SharedStorage cutlass::gemm::Gemm< GemmTraits_ >::SharedStorage

◆ This_

template<typename GemmTraits_ >
typedef Gemm<GemmTraits_> cutlass::gemm::Gemm< GemmTraits_ >::This_

◆ Traits

template<typename GemmTraits_ >
typedef GemmTraits_ cutlass::gemm::Gemm< GemmTraits_ >::Traits

Constructor & Destructor Documentation

◆ Gemm()

template<typename GemmTraits_ >
CUTLASS_DEVICE cutlass::gemm::Gemm< GemmTraits_ >::Gemm ( Params const &  params_,
SharedStorage shared_storage_ 
)
inline

Member Function Documentation

◆ consume_tile()

template<typename GemmTraits_ >
template<bool Residue, bool LastIteration>
CUTLASS_DEVICE void cutlass::gemm::Gemm< GemmTraits_ >::consume_tile ( typename Traits::GlobalLoadStream &  global_to_shared_stream,
typename Traits::SharedStream &  shared_load_stream,
typename MultiplyAdd::Accumulators &  accumulators,
Index  outer_k 
)
inline

◆ launch() [1/2]

template<typename GemmTraits_ >
static __host__ cudaError_t cutlass::gemm::Gemm< GemmTraits_ >::launch ( Params const &  params,
cudaStream_t  stream = cudaStreamDefault 
)
inlinestatic

Launch the kernel.

◆ launch() [2/2]

template<typename GemmTraits_ >
static __host__ cudaError_t cutlass::gemm::Gemm< GemmTraits_ >::launch ( CUfunction  kernel,
Params const &  params,
CUstream  stream = CU_STREAM_LEGACY 
)
inlinestatic

◆ multiply_add()

template<typename GemmTraits_ >
CUTLASS_DEVICE void cutlass::gemm::Gemm< GemmTraits_ >::multiply_add ( )
inline

Member Data Documentation

◆ kThreads

template<typename GemmTraits_ >
int const cutlass::gemm::Gemm< GemmTraits_ >::kThreads = Traits::GemmConfig::kThreads
static

◆ kWarpGemmSteps

template<typename GemmTraits_ >
Index const cutlass::gemm::Gemm< GemmTraits_ >::kWarpGemmSteps
static
Initial value:
=
Traits::GemmConfig::AccumulatorsPerWarp::kD / MultiplyAdd::InstructionShape::kD

◆ params

template<typename GemmTraits_ >
Params const& cutlass::gemm::Gemm< GemmTraits_ >::params

◆ shared_storage

template<typename GemmTraits_ >
SharedStorage& cutlass::gemm::Gemm< GemmTraits_ >::shared_storage

The documentation for this struct was generated from the following file: