OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
tex_obj_input_iterator.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 #pragma once
35 
36 #include <iterator>
37 #include <iostream>
38 
39 #include "../thread/thread_load.cuh"
40 #include "../thread/thread_store.cuh"
41 #include "../util_device.cuh"
42 #include "../util_debug.cuh"
43 #include "../util_namespace.cuh"
44 
45 #if (THRUST_VERSION >= 100700)
46  // This iterator is compatible with Thrust API 1.7 and newer
47  #include <thrust/iterator/iterator_facade.h>
48  #include <thrust/iterator/iterator_traits.h>
49 #endif // THRUST_VERSION
50 
51 
53 CUB_NS_PREFIX
54 
56 namespace cub {
57 
108 template <
109  typename T,
110  typename OffsetT = ptrdiff_t>
112 {
113 public:
114 
115  // Required iterator traits
118  typedef T value_type;
119  typedef T* pointer;
120  typedef T reference;
121 
122 #if (THRUST_VERSION >= 100700)
123  // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods
124  typedef typename thrust::detail::iterator_facade_category<
125  thrust::device_system_tag,
126  thrust::random_access_traversal_tag,
127  value_type,
128  reference
129  >::type iterator_category;
130 #else
131  typedef std::random_access_iterator_tag iterator_category;
132 #endif // THRUST_VERSION
133 
134 private:
135 
136  // Largest texture word we can use in device
137  typedef typename UnitWord<T>::TextureWord TextureWord;
138 
139  // Number of texture words per T
140  enum {
141  TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord)
142  };
143 
144 private:
145 
146  T* ptr;
147  difference_type tex_offset;
148  cudaTextureObject_t tex_obj;
149 
150 public:
151 
153  __host__ __device__ __forceinline__ TexObjInputIterator()
154  :
155  ptr(NULL),
156  tex_offset(0),
157  tex_obj(0)
158  {}
159 
161  template <typename QualifiedT>
162  cudaError_t BindTexture(
163  QualifiedT *ptr,
164  size_t bytes = size_t(-1),
165  size_t tex_offset = 0)
166  {
167  this->ptr = const_cast<typename RemoveQualifiers<QualifiedT>::Type *>(ptr);
168  this->tex_offset = tex_offset;
169 
170  cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<TextureWord>();
171  cudaResourceDesc res_desc;
172  cudaTextureDesc tex_desc;
173  memset(&res_desc, 0, sizeof(cudaResourceDesc));
174  memset(&tex_desc, 0, sizeof(cudaTextureDesc));
175  res_desc.resType = cudaResourceTypeLinear;
176  res_desc.res.linear.devPtr = this->ptr;
177  res_desc.res.linear.desc = channel_desc;
178  res_desc.res.linear.sizeInBytes = bytes;
179  tex_desc.readMode = cudaReadModeElementType;
180  return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL);
181  }
182 
184  cudaError_t UnbindTexture()
185  {
186  return cudaDestroyTextureObject(tex_obj);
187  }
188 
190  __host__ __device__ __forceinline__ self_type operator++(int)
191  {
192  self_type retval = *this;
193  tex_offset++;
194  return retval;
195  }
196 
198  __host__ __device__ __forceinline__ self_type operator++()
199  {
200  tex_offset++;
201  return *this;
202  }
203 
205  __host__ __device__ __forceinline__ reference operator*() const
206  {
207 #if (CUB_PTX_ARCH == 0)
208  // Simply dereference the pointer on the host
209  return ptr[tex_offset];
210 #else
211  // Move array of uninitialized words, then alias and assign to return value
212  TextureWord words[TEXTURE_MULTIPLE];
213 
214  #pragma unroll
215  for (int i = 0; i < TEXTURE_MULTIPLE; ++i)
216  {
217  words[i] = tex1Dfetch<TextureWord>(
218  tex_obj,
219  (tex_offset * TEXTURE_MULTIPLE) + i);
220  }
221 
222  // Load from words
223  return *reinterpret_cast<T*>(words);
224 #endif
225  }
226 
228  template <typename Distance>
229  __host__ __device__ __forceinline__ self_type operator+(Distance n) const
230  {
231  self_type retval;
232  retval.ptr = ptr;
233  retval.tex_obj = tex_obj;
234  retval.tex_offset = tex_offset + n;
235  return retval;
236  }
237 
239  template <typename Distance>
240  __host__ __device__ __forceinline__ self_type& operator+=(Distance n)
241  {
242  tex_offset += n;
243  return *this;
244  }
245 
247  template <typename Distance>
248  __host__ __device__ __forceinline__ self_type operator-(Distance n) const
249  {
250  self_type retval;
251  retval.ptr = ptr;
252  retval.tex_obj = tex_obj;
253  retval.tex_offset = tex_offset - n;
254  return retval;
255  }
256 
258  template <typename Distance>
259  __host__ __device__ __forceinline__ self_type& operator-=(Distance n)
260  {
261  tex_offset -= n;
262  return *this;
263  }
264 
266  __host__ __device__ __forceinline__ difference_type operator-(self_type other) const
267  {
268  return tex_offset - other.tex_offset;
269  }
270 
272  template <typename Distance>
273  __host__ __device__ __forceinline__ reference operator[](Distance n) const
274  {
275  self_type offset = (*this) + n;
276  return *offset;
277  }
278 
280  __host__ __device__ __forceinline__ pointer operator->()
281  {
282  return &(*(*this));
283  }
284 
286  __host__ __device__ __forceinline__ bool operator==(const self_type& rhs)
287  {
288  return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset) && (tex_obj == rhs.tex_obj));
289  }
290 
292  __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs)
293  {
294  return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset) || (tex_obj != rhs.tex_obj));
295  }
296 
298  friend std::ostream& operator<<(std::ostream& os, const self_type& itr)
299  {
300  return os;
301  }
302 
303 };
304 
305 
306  // end group UtilIterator
308 
309 } // CUB namespace
310 CUB_NS_POSTFIX // Optional outer namespace(s)
__host__ __device__ __forceinline__ reference operator[](Distance n) const
Array subscript.
__host__ __device__ __forceinline__ pointer operator->()
Structure dereference.
Optional outer namespace(s)
T * pointer
The type of a pointer to an element the iterator can point to.
__host__ __device__ __forceinline__ self_type operator-(Distance n) const
Subtraction.
__host__ __device__ __forceinline__ self_type operator+(Distance n) const
Addition.
__host__ __device__ __forceinline__ bool operator!=(const self_type &rhs)
Not equal to.
Up Type
Type without const and volatile qualifiers.
Definition: util_type.cuh:225
BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block.
OffsetT OffsetT
[in] Total number of input data items
A random-access input wrapper for dereferencing array values through texture cache....
friend std::ostream & operator<<(std::ostream &os, const self_type &itr)
ostream operator
cudaError_t BindTexture(QualifiedT *ptr, size_t bytes=size_t(-1), size_t tex_offset=0)
Use this iterator to bind ptr with a texture reference.
__host__ __device__ __forceinline__ bool operator==(const self_type &rhs)
Equal to.
__host__ __device__ __forceinline__ self_type & operator+=(Distance n)
Addition assignment.
__host__ __device__ __forceinline__ self_type operator++()
Prefix increment.
__host__ __device__ __forceinline__ reference operator *() const
Indirection.
__host__ __device__ __forceinline__ TexObjInputIterator()
Constructor.
TexObjInputIterator self_type
My own type.
__host__ __device__ __forceinline__ self_type & operator-=(Distance n)
Subtraction assignment.
T reference
The type of a reference to an element the iterator can point to.
__host__ __device__ __forceinline__ difference_type operator-(self_type other) const
Distance.
OffsetT difference_type
Type to express the result of subtracting one iterator from another.
cudaError_t UnbindTexture()
Unbind this iterator from its texture reference.
std::random_access_iterator_tag iterator_category
The iterator category.
__host__ __device__ __forceinline__ self_type operator++(int)
Postfix increment.
T value_type
The type of the element the iterator can point to.