53 template <
typename Scalar_,
57 typename FragmentElement_ = Scalar_,
59 size_t size = (
sizeof(Scalar_) * kAccessSize)>
66 dst = *
reinterpret_cast<AccessType const*
>(pointer + offset);
74 template <
typename Scalar_,
int kAccessSize, MemorySpace::Kind Memory_>
81 reinterpret_cast<uint16_t&
>(dst) = reinterpret_cast<uint16_t const*>(&pointer[offset])[0];
87 template <
typename Scalar_,
int kAccessSize, MemorySpace::Kind Memory_,
int kStr
ide>
94 dst.
registers[0] =
reinterpret_cast<uint32_t const*
>(&pointer[offset])[0];
101 template <
typename Scalar_,
int kAccessSize, MemorySpace::Kind Memory_,
int kStr
ide>
108 uint2 tmp =
reinterpret_cast<uint2 const*
>(&pointer[offset])[0];
116 template <MemorySpace::Kind Memory_,
int kStr
ide>
123 double2 tmp =
reinterpret_cast<double2 const*
>(&pointer[offset])[0];
131 #if defined(__CUDACC_VERSION_MAJOR) && __CUDACC_VERSION_MAJOR < 10 133 template <MemorySpace::Kind Memory_,
int kStr
ide>
134 struct Load<half, 8, Memory_, FragmentElementType::kScalar, half, kStride, 16> {
140 int2 tmp =
reinterpret_cast<int2 const*
>(&pointer[offset])[0];
141 dst.registers[0] = tmp.x;
142 dst.registers[1] = tmp.y;
144 tmp =
reinterpret_cast<int2 const*
>(&pointer[offset + 4])[0];
145 dst.registers[2] = tmp.x;
146 dst.registers[3] = tmp.y;
154 template <
typename Scalar_,
int kAccessSize, MemorySpace::Kind Memory_,
int kStr
ide>
161 uint4 tmp =
reinterpret_cast<uint4 const*
>(&pointer[offset])[0];
171 template <
typename Scalar_,
175 typename FragmentElement_ = Scalar_,
177 size_t size = (
sizeof(Scalar_) * kAccessSize)>
184 pointer[offset] = *
reinterpret_cast<Scalar_ const*
>(&src);
190 template <
typename Scalar_,
int kAccessSize, MemorySpace::Kind Memory_>
197 uint16_t* addr =
reinterpret_cast<uint16_t*
>(&pointer[offset]);
198 addr[0] =
reinterpret_cast<uint16_t const&
>(src);
204 template <
typename Scalar_,
int kAccessSize, MemorySpace::Kind Memory_,
int kStr
ide>
211 uint32_t* addr =
reinterpret_cast<uint32_t*
>(&pointer[offset]);
218 template <
typename Scalar_,
int kAccessSize, MemorySpace::Kind Memory_,
int kStr
ide>
225 uint2* addr =
reinterpret_cast<uint2*
>(&pointer[offset]);
232 template <MemorySpace::Kind Memory_,
int kStr
ide>
239 double2* addr =
reinterpret_cast<double2*
>(&pointer[offset]);
240 addr[0] = make_double2(src[0], src[1]);
246 template <
typename Scalar_,
int kAccessSize, MemorySpace::Kind Memory_,
int kStr
ide>
253 uint4* addr =
reinterpret_cast<uint4*
>(&pointer[offset]);
260 template <
typename Scalar_,
263 typename FragmentElement_,
278 value.load(&pointer[offset], kStride);
284 template <
int kAccessSize,
286 typename FragmentElement_,
302 value.load(&pointer[offset], kStride * 32);
308 template <
int kAccessSize,
310 typename FragmentElement_,
326 value.load(&pointer[offset], kStride * 8);
332 template <
int kAccessSize,
334 typename FragmentElement_,
350 value.load(&pointer[offset], kStride * 8);
355 template <
typename Scalar_,
358 typename FragmentElement_,
373 value.store(&pointer[offset], kStride);
static CUTLASS_HOST_DEVICE void load(AccessType &value, Vector< bin1_t, 32 > const *pointer, int offset)
The load function.
Definition: load_store.h:300
Vectorize< Scalar_, kAccessSize >::Type AccessType
The output type.
Definition: load_store.h:157
Vectorize< Scalar_, kAccessSize >::Type AccessType
The output type.
Definition: load_store.h:77
static CUTLASS_HOST_DEVICE void store(AccessType const &src, double *pointer, int offset)
The store function.
Definition: load_store.h:238
static CUTLASS_HOST_DEVICE void load(AccessType &value, Vector< int4_t, 8 > const *pointer, int offset)
The load function.
Definition: load_store.h:324
Definition: load_store.h:41
FragmentElement_ AccessType
The output type.
Definition: load_store.h:321
Definition: numeric_types.h:39
Enum to specify which memory space data resides in.
Definition: load_store.h:38
static CUTLASS_HOST_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:196
static CUTLASS_HOST_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:252
FragmentElement_ AccessType
The output type.
Definition: load_store.h:297
Specifies whether iterator storage fragment consists of Scalar values or WMMA matrix.
Definition: load_store.h:47
Definition: load_store.h:42
Definition: load_store.h:48
FragmentElement_ AccessType
The output type.
Definition: load_store.h:345
Vectorize< double, 2 >::Type AccessType
The output type.
Definition: load_store.h:119
Vectorize< FragmentElement_, kAccessSize >::Type AccessType
The output type.
Definition: load_store.h:180
Kind
Definition: load_store.h:39
Definition: load_store.h:178
static CUTLASS_HOST_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:160
uint32_t registers[kRegisters]
The data in registers.
Definition: vector.h:81
static CUTLASS_HOST_DEVICE void load(AccessType &value, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:277
Vectorize< Scalar_, kAccessSize >::Type AccessType
The output type.
Definition: load_store.h:193
Vectorize< Scalar_, kAccessSize >::Type AccessType
The output type.
Definition: load_store.h:104
Kind
Definition: load_store.h:48
Definition: load_store.h:40
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
Vectorize< Scalar_, kAccessSize >::Type AccessType
The output type.
Definition: load_store.h:62
static CUTLASS_HOST_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:107
Definition: load_store.h:60
FragmentElement_ AccessType
The input type.
Definition: load_store.h:369
static CUTLASS_HOST_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:93
Definition: load_store.h:48
Vector< Element_, kLanes_ > Type
Definition: vector.h:271
Defines a 1D vector of elements held in the registers of each thread.
Vectorize< Scalar_, kAccessSize >::Type AccessType
The output type.
Definition: load_store.h:249
static CUTLASS_HOST_DEVICE void load(AccessType &value, Vector< uint4_t, 8 > const *pointer, int offset)
The load function.
Definition: load_store.h:348
Definition: numeric_types.h:43
Vectorize< Scalar_, kAccessSize >::Type AccessType
The output type.
Definition: load_store.h:90
static CUTLASS_HOST_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:183
static CUTLASS_HOST_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:224
Vectorize< Scalar_, kAccessSize >::Type AccessType
The output type.
Definition: load_store.h:221
Vectorize< Scalar_, kAccessSize >::Type AccessType
The output type.
Definition: load_store.h:207
static CUTLASS_HOST_DEVICE void store(AccessType const &value, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:372
Definition: numeric_types.h:41
FragmentElement_ AccessType
The output type.
Definition: load_store.h:274
static CUTLASS_HOST_DEVICE void store(AccessType const &src, Scalar_ *pointer, int offset)
The store function.
Definition: load_store.h:210
Vectorize< double, 2 >::Type AccessType
The output type.
Definition: load_store.h:235
static CUTLASS_HOST_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:65
static CUTLASS_HOST_DEVICE void load(AccessType &dst, Scalar_ const *pointer, int offset)
The load function.
Definition: load_store.h:80
static CUTLASS_HOST_DEVICE void load(AccessType &dst, double const *pointer, int offset)
The load function.
Definition: load_store.h:122