OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Loading...
Searching...
No Matches
tex_ref_input_iterator.cuh
Go to the documentation of this file.
1/******************************************************************************
2 * Copyright (c) 2011, Duane Merrill. All rights reserved.
3 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the NVIDIA CORPORATION nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 ******************************************************************************/
28
34#pragma once
35
36#include <iterator>
37#include <iostream>
38
39#include "../thread/thread_load.cuh"
40#include "../thread/thread_store.cuh"
41#include "../util_device.cuh"
42#include "../util_debug.cuh"
43#include "../util_namespace.cuh"
44
45#if (CUDA_VERSION >= 5050) || defined(DOXYGEN_ACTIVE) // This iterator is compatible with CUDA 5.5 and newer
46
47#if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer
48 #include <thrust/iterator/iterator_facade.h>
49 #include <thrust/iterator/iterator_traits.h>
50#endif // THRUST_VERSION
51
52
54CUB_NS_PREFIX
55
57namespace cub {
58
59
60/******************************************************************************
61 * Static file-scope Tesla/Fermi-style texture references
62 *****************************************************************************/
63
64#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
65
66// Anonymous namespace
67namespace {
68
70template <typename T>
71struct IteratorTexRef
72{
74 template <int UNIQUE_ID>
75 struct TexId
76 {
77 // Largest texture word we can use in device
78 typedef typename UnitWord<T>::DeviceWord DeviceWord;
79 typedef typename UnitWord<T>::TextureWord TextureWord;
80
81 // Number of texture words per T
82 enum {
83 DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord),
84 TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord)
85 };
86
87 // Texture reference type
88 typedef texture<TextureWord> TexRef;
89
90 // Texture reference
91 static TexRef ref;
92
94 static cudaError_t BindTexture(void *d_in, size_t &offset)
95 {
96 if (d_in)
97 {
98 cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc<TextureWord>();
99 ref.channelDesc = tex_desc;
100 return (CubDebug(cudaBindTexture(&offset, ref, d_in)));
101 }
102
103 return cudaSuccess;
104 }
105
107 static cudaError_t UnbindTexture()
108 {
109 return CubDebug(cudaUnbindTexture(ref));
110 }
111
113 template <typename Distance>
114 static __device__ __forceinline__ T Fetch(Distance tex_offset)
115 {
116 DeviceWord temp[DEVICE_MULTIPLE];
117 TextureWord *words = reinterpret_cast<TextureWord*>(temp);
118
119 #pragma unroll
120 for (int i = 0; i < TEXTURE_MULTIPLE; ++i)
121 {
122 words[i] = tex1Dfetch(ref, (tex_offset * TEXTURE_MULTIPLE) + i);
123 }
124
125 return reinterpret_cast<T&>(temp);
126 }
127 };
128};
129
130// Texture reference definitions
131template <typename T>
132template <int UNIQUE_ID>
133typename IteratorTexRef<T>::template TexId<UNIQUE_ID>::TexRef IteratorTexRef<T>::template TexId<UNIQUE_ID>::ref = 0;
134
135
136} // Anonymous namespace
137
138
139#endif // DOXYGEN_SHOULD_SKIP_THIS
140
141
142
200template <
201 typename T,
202 int UNIQUE_ID,
203 typename OffsetT = ptrdiff_t>
204class TexRefInputIterator
205{
206public:
207
208 // Required iterator traits
209 typedef TexRefInputIterator self_type;
210 typedef OffsetT difference_type;
211 typedef T value_type;
212 typedef T* pointer;
213 typedef T reference;
214
215#if (THRUST_VERSION >= 100700)
216 // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods
217 typedef typename thrust::detail::iterator_facade_category<
218 thrust::device_system_tag,
219 thrust::random_access_traversal_tag,
220 value_type,
221 reference
222 >::type iterator_category;
223#else
224 typedef std::random_access_iterator_tag iterator_category;
225#endif // THRUST_VERSION
226
227private:
228
229 T* ptr;
230 difference_type tex_offset;
231
232 // Texture reference wrapper (old Tesla/Fermi-style textures)
233 typedef typename IteratorTexRef<T>::template TexId<UNIQUE_ID> TexId;
234
235public:
236/*
238 __host__ __device__ __forceinline__ TexRefInputIterator()
239 :
240 ptr(NULL),
241 tex_offset(0)
242 {}
243*/
245 template <typename QualifiedT>
246 cudaError_t BindTexture(
247 QualifiedT *ptr,
248 size_t bytes = size_t(-1),
249 size_t tex_offset = 0)
250 {
251 this->ptr = const_cast<typename RemoveQualifiers<QualifiedT>::Type *>(ptr);
252 size_t offset;
253 cudaError_t retval = TexId::BindTexture(this->ptr + tex_offset, offset);
254 this->tex_offset = (difference_type) (offset / sizeof(QualifiedT));
255 return retval;
256 }
257
259 cudaError_t UnbindTexture()
260 {
261 return TexId::UnbindTexture();
262 }
263
265 __host__ __device__ __forceinline__ self_type operator++(int)
266 {
267 self_type retval = *this;
268 tex_offset++;
269 return retval;
270 }
271
273 __host__ __device__ __forceinline__ self_type operator++()
274 {
275 tex_offset++;
276 return *this;
277 }
278
280 __host__ __device__ __forceinline__ reference operator*() const
281 {
282#if (CUB_PTX_ARCH == 0)
283 // Simply dereference the pointer on the host
284 return ptr[tex_offset];
285#else
286 // Use the texture reference
287 return TexId::Fetch(tex_offset);
288#endif
289 }
290
292 template <typename Distance>
293 __host__ __device__ __forceinline__ self_type operator+(Distance n) const
294 {
295 self_type retval;
296 retval.ptr = ptr;
297 retval.tex_offset = tex_offset + n;
298 return retval;
299 }
300
302 template <typename Distance>
303 __host__ __device__ __forceinline__ self_type& operator+=(Distance n)
304 {
305 tex_offset += n;
306 return *this;
307 }
308
310 template <typename Distance>
311 __host__ __device__ __forceinline__ self_type operator-(Distance n) const
312 {
313 self_type retval;
314 retval.ptr = ptr;
315 retval.tex_offset = tex_offset - n;
316 return retval;
317 }
318
320 template <typename Distance>
321 __host__ __device__ __forceinline__ self_type& operator-=(Distance n)
322 {
323 tex_offset -= n;
324 return *this;
325 }
326
328 __host__ __device__ __forceinline__ difference_type operator-(self_type other) const
329 {
330 return tex_offset - other.tex_offset;
331 }
332
334 template <typename Distance>
335 __host__ __device__ __forceinline__ reference operator[](Distance n) const
336 {
337 self_type offset = (*this) + n;
338 return *offset;
339 }
340
342 __host__ __device__ __forceinline__ pointer operator->()
343 {
344 return &(*(*this));
345 }
346
348 __host__ __device__ __forceinline__ bool operator==(const self_type& rhs)
349 {
350 return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset));
351 }
352
354 __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs)
355 {
356 return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset));
357 }
358
360 friend std::ostream& operator<<(std::ostream& os, const self_type& itr)
361 {
362 return os;
363 }
364
365};
366
367
368 // end group UtilIterator
370
371} // CUB namespace
372CUB_NS_POSTFIX // Optional outer namespace(s)
373
374#endif // CUDA_VERSION
#define CubDebug(e)
Debug macro.
Optional outer namespace(s)
OffsetT OffsetT
[in] Total number of input data items