OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
tex_ref_input_iterator.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 #pragma once
35 
36 #include <iterator>
37 #include <iostream>
38 
39 #include "../thread/thread_load.cuh"
40 #include "../thread/thread_store.cuh"
41 #include "../util_device.cuh"
42 #include "../util_debug.cuh"
43 #include "../util_namespace.cuh"
44 
45 #if (CUDA_VERSION >= 5050) || defined(DOXYGEN_ACTIVE) // This iterator is compatible with CUDA 5.5 and newer
46 
47 #if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer
48  #include <thrust/iterator/iterator_facade.h>
49  #include <thrust/iterator/iterator_traits.h>
50 #endif // THRUST_VERSION
51 
52 
54 CUB_NS_PREFIX
55 
57 namespace cub {
58 
59 
60 /******************************************************************************
61  * Static file-scope Tesla/Fermi-style texture references
62  *****************************************************************************/
63 
64 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
65 
66 // Anonymous namespace
67 namespace {
68 
70 template <typename T>
71 struct IteratorTexRef
72 {
74  template <int UNIQUE_ID>
75  struct TexId
76  {
77  // Largest texture word we can use in device
78  typedef typename UnitWord<T>::DeviceWord DeviceWord;
79  typedef typename UnitWord<T>::TextureWord TextureWord;
80 
81  // Number of texture words per T
82  enum {
83  DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord),
84  TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord)
85  };
86 
87  // Texture reference type
88  typedef texture<TextureWord> TexRef;
89 
90  // Texture reference
91  static TexRef ref;
92 
94  static cudaError_t BindTexture(void *d_in, size_t &offset)
95  {
96  if (d_in)
97  {
98  cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc<TextureWord>();
99  ref.channelDesc = tex_desc;
100  return (CubDebug(cudaBindTexture(&offset, ref, d_in)));
101  }
102 
103  return cudaSuccess;
104  }
105 
107  static cudaError_t UnbindTexture()
108  {
109  return CubDebug(cudaUnbindTexture(ref));
110  }
111 
113  template <typename Distance>
114  static __device__ __forceinline__ T Fetch(Distance tex_offset)
115  {
116  DeviceWord temp[DEVICE_MULTIPLE];
117  TextureWord *words = reinterpret_cast<TextureWord*>(temp);
118 
119  #pragma unroll
120  for (int i = 0; i < TEXTURE_MULTIPLE; ++i)
121  {
122  words[i] = tex1Dfetch(ref, (tex_offset * TEXTURE_MULTIPLE) + i);
123  }
124 
125  return reinterpret_cast<T&>(temp);
126  }
127  };
128 };
129 
130 // Texture reference definitions
131 template <typename T>
132 template <int UNIQUE_ID>
133 typename IteratorTexRef<T>::template TexId<UNIQUE_ID>::TexRef IteratorTexRef<T>::template TexId<UNIQUE_ID>::ref = 0;
134 
135 
136 } // Anonymous namespace
137 
138 
139 #endif // DOXYGEN_SHOULD_SKIP_THIS
140 
141 
142 
200 template <
201  typename T,
202  int UNIQUE_ID,
203  typename OffsetT = ptrdiff_t>
204 class TexRefInputIterator
205 {
206 public:
207 
208  // Required iterator traits
209  typedef TexRefInputIterator self_type;
210  typedef OffsetT difference_type;
211  typedef T value_type;
212  typedef T* pointer;
213  typedef T reference;
214 
215 #if (THRUST_VERSION >= 100700)
216  // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods
217  typedef typename thrust::detail::iterator_facade_category<
218  thrust::device_system_tag,
219  thrust::random_access_traversal_tag,
220  value_type,
221  reference
222  >::type iterator_category;
223 #else
224  typedef std::random_access_iterator_tag iterator_category;
225 #endif // THRUST_VERSION
226 
227 private:
228 
229  T* ptr;
230  difference_type tex_offset;
231 
232  // Texture reference wrapper (old Tesla/Fermi-style textures)
233  typedef typename IteratorTexRef<T>::template TexId<UNIQUE_ID> TexId;
234 
235 public:
236 /*
238  __host__ __device__ __forceinline__ TexRefInputIterator()
239  :
240  ptr(NULL),
241  tex_offset(0)
242  {}
243 */
245  template <typename QualifiedT>
246  cudaError_t BindTexture(
247  QualifiedT *ptr,
248  size_t bytes = size_t(-1),
249  size_t tex_offset = 0)
250  {
251  this->ptr = const_cast<typename RemoveQualifiers<QualifiedT>::Type *>(ptr);
252  size_t offset;
253  cudaError_t retval = TexId::BindTexture(this->ptr + tex_offset, offset);
254  this->tex_offset = (difference_type) (offset / sizeof(QualifiedT));
255  return retval;
256  }
257 
259  cudaError_t UnbindTexture()
260  {
261  return TexId::UnbindTexture();
262  }
263 
265  __host__ __device__ __forceinline__ self_type operator++(int)
266  {
267  self_type retval = *this;
268  tex_offset++;
269  return retval;
270  }
271 
273  __host__ __device__ __forceinline__ self_type operator++()
274  {
275  tex_offset++;
276  return *this;
277  }
278 
280  __host__ __device__ __forceinline__ reference operator*() const
281  {
282 #if (CUB_PTX_ARCH == 0)
283  // Simply dereference the pointer on the host
284  return ptr[tex_offset];
285 #else
286  // Use the texture reference
287  return TexId::Fetch(tex_offset);
288 #endif
289  }
290 
292  template <typename Distance>
293  __host__ __device__ __forceinline__ self_type operator+(Distance n) const
294  {
295  self_type retval;
296  retval.ptr = ptr;
297  retval.tex_offset = tex_offset + n;
298  return retval;
299  }
300 
302  template <typename Distance>
303  __host__ __device__ __forceinline__ self_type& operator+=(Distance n)
304  {
305  tex_offset += n;
306  return *this;
307  }
308 
310  template <typename Distance>
311  __host__ __device__ __forceinline__ self_type operator-(Distance n) const
312  {
313  self_type retval;
314  retval.ptr = ptr;
315  retval.tex_offset = tex_offset - n;
316  return retval;
317  }
318 
320  template <typename Distance>
321  __host__ __device__ __forceinline__ self_type& operator-=(Distance n)
322  {
323  tex_offset -= n;
324  return *this;
325  }
326 
328  __host__ __device__ __forceinline__ difference_type operator-(self_type other) const
329  {
330  return tex_offset - other.tex_offset;
331  }
332 
334  template <typename Distance>
335  __host__ __device__ __forceinline__ reference operator[](Distance n) const
336  {
337  self_type offset = (*this) + n;
338  return *offset;
339  }
340 
342  __host__ __device__ __forceinline__ pointer operator->()
343  {
344  return &(*(*this));
345  }
346 
348  __host__ __device__ __forceinline__ bool operator==(const self_type& rhs)
349  {
350  return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset));
351  }
352 
354  __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs)
355  {
356  return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset));
357  }
358 
360  friend std::ostream& operator<<(std::ostream& os, const self_type& itr)
361  {
362  return os;
363  }
364 
365 };
366 
367 
368  // end group UtilIterator
370 
371 } // CUB namespace
372 CUB_NS_POSTFIX // Optional outer namespace(s)
373 
374 #endif // CUDA_VERSION
Optional outer namespace(s)
If< IsMultiple< longlong2 >::IS_MULTIPLE, ulonglong2, VolatileWord >::Type DeviceWord
Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T.
Definition: util_type.cuh:380
If< IsMultiple< int4 >::IS_MULTIPLE, uint4, typename If< IsMultiple< int2 >::IS_MULTIPLE, uint2, ShuffleWord >::Type >::Type TextureWord
Biggest texture reference word that T is a whole multiple of and is not larger than the alignment of ...
Definition: util_type.cuh:387
Up Type
Type without const and volatile qualifiers.
Definition: util_type.cuh:225
OffsetT OffsetT
[in] Total number of input data items
#define CubDebug(e)
Debug macro.
Definition: util_debug.cuh:94