|
Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
|
Template performing matrix multiply-add operation within a thread.
#include <fp16_sgemm_multiply_add.h>
Public Types | |
| typedef Shape< 1, 1, 1, 1 > | InstructionShape |
| The shape of the instruction. More... | |
| typedef ThreadGemmShape_ | ThreadGemmShape |
| The shape of a thread-leveel matrix multiply accumulate. More... | |
| typedef ThreadGemmShape | AccumulatorsPerThread |
| Aliased to "AccumulatorsPerThread" for compatibility. Expect to be renamed in CUTLASS v2.0. More... | |
| typedef ThreadsPerWarp_ | ThreadsPerWarp |
| The number of threads per warp. More... | |
| typedef ShapeMul< ThreadGemmShape, ThreadsPerWarp >::Shape | AccumulatorsPerWarp |
| The number of accumulators per warp. More... | |
| typedef half | ScalarA |
| The type for A. specialized to half. More... | |
| typedef Fragment< ScalarA, AccumulatorsPerThread::kW > | FragmentA |
| The fragment for A. More... | |
| typedef half | ScalarB |
| The type for B. specialized to half. More... | |
| typedef Fragment< ScalarB, AccumulatorsPerThread::kH > | FragmentB |
| The fragment for B. More... | |
| typedef float | ScalarC |
| The type for C and D. specialized to float. More... | |
| typedef Fragment< ScalarC, AccumulatorsPerThread::kH *AccumulatorsPerThread::kW, 16 > | Accumulators |
| The accumulators. More... | |
Public Member Functions | |
| CUTLASS_DEVICE | ThreadMultiplyAdd () |
| Ctor. More... | |
| CUTLASS_DEVICE void | multiply_add (FragmentA const &a, FragmentB const &b, Accumulators const &c, Accumulators &d) |
| Multiply : d = a*b + c. More... | |
| typedef Fragment<ScalarC, AccumulatorsPerThread::kH * AccumulatorsPerThread::kW, 16> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::Accumulators |
| typedef ThreadGemmShape cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::AccumulatorsPerThread |
| typedef ShapeMul<ThreadGemmShape, ThreadsPerWarp>::Shape cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::AccumulatorsPerWarp |
| typedef Fragment<ScalarA, AccumulatorsPerThread::kW> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::FragmentA |
| typedef Fragment<ScalarB, AccumulatorsPerThread::kH> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::FragmentB |
| typedef Shape<1, 1, 1, 1> cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::InstructionShape |
| typedef half cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::ScalarA |
| typedef half cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::ScalarB |
| typedef float cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::ScalarC |
| typedef ThreadGemmShape_ cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::ThreadGemmShape |
| typedef ThreadsPerWarp_ cutlass::gemm::ThreadMultiplyAdd< ThreadGemmShape_, ThreadsPerWarp_, half, half, float >::ThreadsPerWarp |
|
inline |
|
inline |
1.8.14