OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
tex_obj_input_iterator.cuh
Go to the documentation of this file.
1/******************************************************************************
2 * Copyright (c) 2011, Duane Merrill. All rights reserved.
3 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the NVIDIA CORPORATION nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 ******************************************************************************/
28
34#pragma once
35
36#include <iterator>
37#include <iostream>
38
39#include "../thread/thread_load.cuh"
40#include "../thread/thread_store.cuh"
41#include "../util_device.cuh"
42#include "../util_debug.cuh"
43#include "../util_namespace.cuh"
44
45#if (THRUST_VERSION >= 100700)
46 // This iterator is compatible with Thrust API 1.7 and newer
47 #include <thrust/iterator/iterator_facade.h>
48 #include <thrust/iterator/iterator_traits.h>
49#endif // THRUST_VERSION
50
51
53CUB_NS_PREFIX
54
56namespace cub {
57
108template <
109 typename T,
110 typename OffsetT = ptrdiff_t>
112{
113public:
114
115 // Required iterator traits
118 typedef T value_type;
119 typedef T* pointer;
120 typedef T reference;
121
122#if (THRUST_VERSION >= 100700)
123 // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods
124 typedef typename thrust::detail::iterator_facade_category<
125 thrust::device_system_tag,
126 thrust::random_access_traversal_tag,
129 >::type iterator_category;
130#else
131 typedef std::random_access_iterator_tag iterator_category;
132#endif // THRUST_VERSION
133
134private:
135
136 // Largest texture word we can use in device
137 typedef typename UnitWord<T>::TextureWord TextureWord;
138
139 // Number of texture words per T
140 enum {
141 TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord)
142 };
143
144private:
145
146 T* ptr;
147 difference_type tex_offset;
148 cudaTextureObject_t tex_obj;
149
150public:
151
153 __host__ __device__ __forceinline__ TexObjInputIterator()
154 :
155 ptr(NULL),
156 tex_offset(0),
157 tex_obj(0)
158 {}
159
161 template <typename QualifiedT>
162 cudaError_t BindTexture(
163 QualifiedT *ptr,
164 size_t bytes = size_t(-1),
165 size_t tex_offset = 0)
166 {
167 this->ptr = const_cast<typename RemoveQualifiers<QualifiedT>::Type *>(ptr);
168 this->tex_offset = tex_offset;
169
170 cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc<TextureWord>();
171 cudaResourceDesc res_desc;
172 cudaTextureDesc tex_desc;
173 memset(&res_desc, 0, sizeof(cudaResourceDesc));
174 memset(&tex_desc, 0, sizeof(cudaTextureDesc));
175 res_desc.resType = cudaResourceTypeLinear;
176 res_desc.res.linear.devPtr = this->ptr;
177 res_desc.res.linear.desc = channel_desc;
178 res_desc.res.linear.sizeInBytes = bytes;
179 tex_desc.readMode = cudaReadModeElementType;
180 return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL);
181 }
182
184 cudaError_t UnbindTexture()
185 {
186 return cudaDestroyTextureObject(tex_obj);
187 }
188
190 __host__ __device__ __forceinline__ self_type operator++(int)
191 {
192 self_type retval = *this;
193 tex_offset++;
194 return retval;
195 }
196
198 __host__ __device__ __forceinline__ self_type operator++()
199 {
200 tex_offset++;
201 return *this;
202 }
203
205 __host__ __device__ __forceinline__ reference operator*() const
206 {
207#if (CUB_PTX_ARCH == 0)
208 // Simply dereference the pointer on the host
209 return ptr[tex_offset];
210#else
211 // Move array of uninitialized words, then alias and assign to return value
212 TextureWord words[TEXTURE_MULTIPLE];
213
214 #pragma unroll
215 for (int i = 0; i < TEXTURE_MULTIPLE; ++i)
216 {
217 words[i] = tex1Dfetch<TextureWord>(
218 tex_obj,
219 (tex_offset * TEXTURE_MULTIPLE) + i);
220 }
221
222 // Load from words
223 return *reinterpret_cast<T*>(words);
224#endif
225 }
226
228 template <typename Distance>
229 __host__ __device__ __forceinline__ self_type operator+(Distance n) const
230 {
231 self_type retval;
232 retval.ptr = ptr;
233 retval.tex_obj = tex_obj;
234 retval.tex_offset = tex_offset + n;
235 return retval;
236 }
237
239 template <typename Distance>
240 __host__ __device__ __forceinline__ self_type& operator+=(Distance n)
241 {
242 tex_offset += n;
243 return *this;
244 }
245
247 template <typename Distance>
248 __host__ __device__ __forceinline__ self_type operator-(Distance n) const
249 {
250 self_type retval;
251 retval.ptr = ptr;
252 retval.tex_obj = tex_obj;
253 retval.tex_offset = tex_offset - n;
254 return retval;
255 }
256
258 template <typename Distance>
259 __host__ __device__ __forceinline__ self_type& operator-=(Distance n)
260 {
261 tex_offset -= n;
262 return *this;
263 }
264
266 __host__ __device__ __forceinline__ difference_type operator-(self_type other) const
267 {
268 return tex_offset - other.tex_offset;
269 }
270
272 template <typename Distance>
273 __host__ __device__ __forceinline__ reference operator[](Distance n) const
274 {
275 self_type offset = (*this) + n;
276 return *offset;
277 }
278
280 __host__ __device__ __forceinline__ pointer operator->()
281 {
282 return &(*(*this));
283 }
284
286 __host__ __device__ __forceinline__ bool operator==(const self_type& rhs)
287 {
288 return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset) && (tex_obj == rhs.tex_obj));
289 }
290
292 __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs)
293 {
294 return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset) || (tex_obj != rhs.tex_obj));
295 }
296
298 friend std::ostream& operator<<(std::ostream& os, const self_type& itr)
299 {
300 return os;
301 }
302
303};
304
305
306 // end group UtilIterator
308
309} // CUB namespace
310CUB_NS_POSTFIX // Optional outer namespace(s)
BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block.
A random-access input wrapper for dereferencing array values through texture cache....
__host__ __device__ __forceinline__ self_type operator-(Distance n) const
Subtraction.
cudaError_t BindTexture(QualifiedT *ptr, size_t bytes=size_t(-1), size_t tex_offset=0)
Use this iterator to bind ptr with a texture reference.
__host__ __device__ __forceinline__ TexObjInputIterator()
Constructor.
cudaError_t UnbindTexture()
Unbind this iterator from its texture reference.
__host__ __device__ __forceinline__ bool operator==(const self_type &rhs)
Equal to.
T value_type
The type of the element the iterator can point to.
__host__ __device__ __forceinline__ self_type operator+(Distance n) const
Addition.
std::random_access_iterator_tag iterator_category
The iterator category.
T reference
The type of a reference to an element the iterator can point to.
__host__ __device__ __forceinline__ self_type & operator+=(Distance n)
Addition assignment.
__host__ __device__ __forceinline__ reference operator[](Distance n) const
Array subscript.
OffsetT difference_type
Type to express the result of subtracting one iterator from another.
__host__ __device__ __forceinline__ self_type operator++(int)
Postfix increment.
friend std::ostream & operator<<(std::ostream &os, const self_type &itr)
ostream operator
__host__ __device__ __forceinline__ pointer operator->()
Structure dereference.
T * pointer
The type of a pointer to an element the iterator can point to.
TexObjInputIterator self_type
My own type.
__host__ __device__ __forceinline__ difference_type operator-(self_type other) const
Distance.
__host__ __device__ __forceinline__ self_type operator++()
Prefix increment.
__host__ __device__ __forceinline__ self_type & operator-=(Distance n)
Subtraction assignment.
__host__ __device__ __forceinline__ reference operator*() const
Indirection.
__host__ __device__ __forceinline__ bool operator!=(const self_type &rhs)
Not equal to.
Optional outer namespace(s)
OffsetT OffsetT
[in] Total number of input data items
Up Type
Type without const and volatile qualifiers.