Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
Public Types | Public Member Functions | List of all members
cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float > Struct Template Reference

Template performing matrix multiply-add operation within a thread.

#include <fp16_sgemm_multiply_add.h>

Public Types

typedef Shape< 1, 1, 1, 1 > InstructionShape
 The shape of the instruction. More...
 
typedef ThreadGemmShape_ ThreadGemmShape
 The shape of a thread-leveel matrix multiply accumulate. More...
 
typedef ThreadGemmShape AccumulatorsPerThread
 Aliased to "AccumulatorsPerThread" for compatibility. Expect to be renamed in CUTLASS v2.0. More...
 
typedef ThreadsPerWarp_ ThreadsPerWarp
 The number of threads per warp. More...
 
typedef ShapeMul< ThreadGemmShape, ThreadsPerWarp >::Shape AccumulatorsPerWarp
 The number of accumulators per warp. More...
 
typedef half ScalarA
 The type for A. specialized to half. More...
 
typedef Fragment< ScalarA, AccumulatorsPerThread::kW > FragmentA
 The fragment for A. More...
 
typedef half ScalarB
 The type for B. specialized to half. More...
 
typedef Fragment< ScalarB, AccumulatorsPerThread::kH > FragmentB
 The fragment for B. More...
 
typedef float ScalarC
 The type for C and D. specialized to float. More...
 
typedef Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW, 16 > Accumulators
 The accumulators. More...
 

Public Member Functions

CUTLASS_DEVICE ThreadMultiplyAdd ()
 Ctor. More...
 
CUTLASS_DEVICE void multiply_add (FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d)
 Multiply : d = a*b + c. More...
 

Member Typedef Documentation

◆ Accumulators

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef Fragment<ScalarC, AccumulatorsPerThread::kH * AccumulatorsPerThread::kW, 16> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::Accumulators

◆ AccumulatorsPerThread

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef ThreadGemmShape cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::AccumulatorsPerThread

◆ AccumulatorsPerWarp

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef ShapeMul<ThreadGemmShape, ThreadsPerWarp>::Shape cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::AccumulatorsPerWarp

◆ FragmentA

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef Fragment<ScalarA, AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::FragmentA

◆ FragmentB

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef Fragment<ScalarB, AccumulatorsPerThread::kH> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::FragmentB

◆ InstructionShape

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef Shape<1, 1, 1, 1> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::InstructionShape

◆ ScalarA

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef half cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::ScalarA

◆ ScalarB

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef half cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::ScalarB

◆ ScalarC

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef float cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::ScalarC

◆ ThreadGemmShape

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef ThreadGemmShape_ cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::ThreadGemmShape

◆ ThreadsPerWarp

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
typedef ThreadsPerWarp_ cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::ThreadsPerWarp

Constructor & Destructor Documentation

◆ ThreadMultiplyAdd()

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
CUTLASS_DEVICE cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::ThreadMultiplyAdd ( )
inline

Member Function Documentation

◆ multiply_add()

template<typename ThreadGemmShape_ , typename ThreadsPerWarp_ >
CUTLASS_DEVICE void cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::multiply_add ( FragmentA const &  a,
FragmentB const &  b,
Accumulators const &  c,
Accumulators d 
)
inline

The documentation for this struct was generated from the following file: