Cutlass
CUDA Templates for Linear Algebra Subroutines and Solvers
iterator_access.h
Go to the documentation of this file.
1 /***************************************************************************************************
2  * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without modification, are permitted
5  * provided that the following conditions are met:
6  * * Redistributions of source code must retain the above copyright notice, this list of
7  * conditions and the following disclaimer.
8  * * Redistributions in binary form must reproduce the above copyright notice, this list of
9  * conditions and the following disclaimer in the documentation and/or other materials
10  * provided with the distribution.
11  * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
12  * to endorse or promote products derived from this software without specific prior written
13  * permission.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
17  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
19  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  **************************************************************************************************/
28 #pragma once
29 
30 #include "cutlass/load_store.h"
32 #include "cutlass/shape.h"
33 
34 namespace cutlass {
35 
37 // Used by convolution
38 template <typename InputIterator, typename Fragment>
39 CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment) {
40  typename InputIterator::FragmentIterator frag_iterator(fragment);
41  for (int d = 0; d < InputIterator::Iterations::kD; ++d) {
42  for (int h = 0; h < InputIterator::Iterations::kH; ++h) {
43  for (int w = 0; w < InputIterator::Iterations::kW; ++w) {
44  for (int c = 0; c < InputIterator::Iterations::kC; ++c) {
45  if (iterator.valid(d, h, w, c)) {
46  iterator.load_element(reinterpret_cast<typename InputIterator::AccessType &>(
47  frag_iterator.at(d, h, w, c)),
48  d,
49  h,
50  w,
51  c);
52  }
53  }
54  if (w < InputIterator::Iterations::kW - 1) {
55  iterator.inc_w();
56  }
57  }
58  if (h < InputIterator::Iterations::kH - 1) {
59  iterator.inc_h();
60  }
61  }
62  if (d < InputIterator::Iterations::kD - 1) {
63  iterator.inc_d();
64  }
65  }
66  iterator.inc_advance();
67 }
68 
69 template <typename OutputIterator, typename Fragment>
70 CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment) {
71  typename OutputIterator::FragmentIterator frag_iterator(fragment);
72  for (int d = 0; d < OutputIterator::Iterations::kD; ++d) {
73  for (int h = 0; h < OutputIterator::Iterations::kH; ++h) {
74  for (int w = 0; w < OutputIterator::Iterations::kW; ++w) {
75  for (int c = 0; c < OutputIterator::Iterations::kC; ++c) {
76  if (iterator.valid(d, h, w, c)) {
77  iterator.store_element(reinterpret_cast<typename OutputIterator::AccessType &>(
78  frag_iterator.at(d, h, w, c)),
79  d,
80  h,
81  w,
82  c);
83  }
84  }
85  if (w < OutputIterator::Iterations::kW - 1) {
86  iterator.inc_w();
87  }
88  }
89  if (h < OutputIterator::Iterations::kH - 1) {
90  iterator.inc_h();
91  }
92  }
93  if (d < OutputIterator::Iterations::kD - 1) {
94  iterator.inc_d();
95  }
96  }
97  iterator.inc_advance();
98 }
100 
101 } // namespace cutlass
Definition: convert.h:33
A template defining Fragment Concept.
Definition: fragment.h:99
Defines container classes and iterators for managing a statically sized vector of boolean predicates...
Defines abstractions for efficiently loading and storing vectors to memory.
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:46
CUTLASS_HOST_DEVICE void iterator_store(OutputIterator &iterator, Fragment &fragment)
Definition: iterator_access.h:70
CUTLASS_HOST_DEVICE void iterator_load(InputIterator &iterator, Fragment &fragment)
Definition: iterator_access.h:39
Defines Shape implementing the Layout concept for representing a 4D hypercube of objects.