OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
util_type.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 #pragma once
35 
36 #include <iostream>
37 #include <limits>
38 #include <cfloat>
39 
40 #if (__CUDACC_VER_MAJOR__ >= 9)
41  #include <cuda_fp16.h>
42 #endif
43 
44 #include "util_macro.cuh"
45 #include "util_arch.cuh"
46 #include "util_namespace.cuh"
47 
48 
49 
51 CUB_NS_PREFIX
52 
54 namespace cub {
55 
56 
64 /******************************************************************************
65  * Type equality
66  ******************************************************************************/
67 
71 template <bool IF, typename ThenType, typename ElseType>
72 struct If
73 {
75  typedef ThenType Type; // true
76 };
77 
78 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
79 
80 template <typename ThenType, typename ElseType>
81 struct If<false, ThenType, ElseType>
82 {
83  typedef ElseType Type; // false
84 };
85 
86 #endif // DOXYGEN_SHOULD_SKIP_THIS
87 
88 
89 
90 /******************************************************************************
91  * Conditional types
92  ******************************************************************************/
93 
97 template <typename A, typename B>
98 struct Equals
99 {
100  enum {
101  VALUE = 0,
102  NEGATE = 1
103  };
104 };
105 
106 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
107 
108 template <typename A>
109 struct Equals <A, A>
110 {
111  enum {
112  VALUE = 1,
113  NEGATE = 0
114  };
115 };
116 
117 #endif // DOXYGEN_SHOULD_SKIP_THIS
118 
119 
120 /******************************************************************************
121  * Static math
122  ******************************************************************************/
123 
131 template <int N, int CURRENT_VAL = N, int COUNT = 0>
132 struct Log2
133 {
135  enum { VALUE = Log2<N, (CURRENT_VAL >> 1), COUNT + 1>::VALUE }; // Inductive case
136 };
137 
138 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
139 
140 template <int N, int COUNT>
141 struct Log2<N, 0, COUNT>
142 {
143  enum {VALUE = (1 << (COUNT - 1) < N) ? // Base case
144  COUNT :
145  COUNT - 1 };
146 };
147 
148 #endif // DOXYGEN_SHOULD_SKIP_THIS
149 
150 
154 template <int N>
156 {
157  enum { VALUE = ((N & (N - 1)) == 0) };
158 };
159 
160 
161 
162 /******************************************************************************
163  * Pointer vs. iterator detection
164  ******************************************************************************/
165 
169 template <typename Tp>
170 struct IsPointer
171 {
172  enum { VALUE = 0 };
173 };
174 
175 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
176 
177 template <typename Tp>
178 struct IsPointer<Tp*>
179 {
180  enum { VALUE = 1 };
181 };
182 
183 #endif // DOXYGEN_SHOULD_SKIP_THIS
184 
185 
186 
187 /******************************************************************************
188  * Qualifier detection
189  ******************************************************************************/
190 
194 template <typename Tp>
196 {
197  enum { VALUE = 0 };
198 };
199 
200 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
201 
202 template <typename Tp>
203 struct IsVolatile<Tp volatile>
204 {
205  enum { VALUE = 1 };
206 };
207 
208 #endif // DOXYGEN_SHOULD_SKIP_THIS
209 
210 
211 /******************************************************************************
212  * Qualifier removal
213  ******************************************************************************/
214 
221 template <typename Tp, typename Up = Tp>
223 {
225  typedef Up Type;
226 };
227 
228 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
229 
230 template <typename Tp, typename Up>
231 struct RemoveQualifiers<Tp, volatile Up>
232 {
233  typedef Up Type;
234 };
235 
236 template <typename Tp, typename Up>
237 struct RemoveQualifiers<Tp, const Up>
238 {
239  typedef Up Type;
240 };
241 
242 template <typename Tp, typename Up>
243 struct RemoveQualifiers<Tp, const volatile Up>
244 {
245  typedef Up Type;
246 };
247 
248 
249 /******************************************************************************
250  * Marker types
251  ******************************************************************************/
252 
256 struct NullType
257 {
258 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
259 
260  template <typename T>
261  __host__ __device__ __forceinline__ NullType& operator =(const T&) { return *this; }
262 
263  __host__ __device__ __forceinline__ bool operator ==(const NullType&) { return true; }
264 
265  __host__ __device__ __forceinline__ bool operator !=(const NullType&) { return false; }
266 
267 #endif // DOXYGEN_SHOULD_SKIP_THIS
268 };
269 
270 
274 template <int A>
275 struct Int2Type
276 {
277  enum {VALUE = A};
278 };
279 
280 
281 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
282 
283 
284 /******************************************************************************
285  * Size and alignment
286  ******************************************************************************/
287 
289 template <typename T>
291 {
292  struct Pad
293  {
294  T val;
295  char byte;
296  };
297 
298  enum
299  {
301  ALIGN_BYTES = sizeof(Pad) - sizeof(T)
302  };
303 
305  typedef T Type;
306 };
307 
308 // Specializations where host C++ compilers (e.g., 32-bit Windows) may disagree
309 // with device C++ compilers (EDG) on types passed as template parameters through
310 // kernel functions
311 
312 #define __CUB_ALIGN_BYTES(t, b) \
313  template <> struct AlignBytes<t> \
314  { enum { ALIGN_BYTES = b }; typedef __align__(b) t Type; };
315 
316 __CUB_ALIGN_BYTES(short4, 8)
317 __CUB_ALIGN_BYTES(ushort4, 8)
318 __CUB_ALIGN_BYTES(int2, 8)
319 __CUB_ALIGN_BYTES(uint2, 8)
320 __CUB_ALIGN_BYTES(long long, 8)
321 __CUB_ALIGN_BYTES(unsigned long long, 8)
322 __CUB_ALIGN_BYTES(float2, 8)
323 __CUB_ALIGN_BYTES(double, 8)
324 #ifdef _WIN32
325  __CUB_ALIGN_BYTES(long2, 8)
326  __CUB_ALIGN_BYTES(ulong2, 8)
327 #else
328  __CUB_ALIGN_BYTES(long2, 16)
329  __CUB_ALIGN_BYTES(ulong2, 16)
330 #endif
331 __CUB_ALIGN_BYTES(int4, 16)
332 __CUB_ALIGN_BYTES(uint4, 16)
333 __CUB_ALIGN_BYTES(float4, 16)
334 __CUB_ALIGN_BYTES(long4, 16)
335 __CUB_ALIGN_BYTES(ulong4, 16)
336 __CUB_ALIGN_BYTES(longlong2, 16)
337 __CUB_ALIGN_BYTES(ulonglong2, 16)
338 __CUB_ALIGN_BYTES(double2, 16)
339 __CUB_ALIGN_BYTES(longlong4, 16)
340 __CUB_ALIGN_BYTES(ulonglong4, 16)
341 __CUB_ALIGN_BYTES(double4, 16)
342 
343 template <typename T> struct AlignBytes<volatile T> : AlignBytes<T> {};
344 template <typename T> struct AlignBytes<const T> : AlignBytes<T> {};
345 template <typename T> struct AlignBytes<const volatile T> : AlignBytes<T> {};
346 
347 
349 template <typename T>
350 struct UnitWord
351 {
352  enum {
353  ALIGN_BYTES = AlignBytes<T>::ALIGN_BYTES
354  };
355 
356  template <typename Unit>
357  struct IsMultiple
358  {
359  enum {
360  UNIT_ALIGN_BYTES = AlignBytes<Unit>::ALIGN_BYTES,
361  IS_MULTIPLE = (sizeof(T) % sizeof(Unit) == 0) && (ALIGN_BYTES % UNIT_ALIGN_BYTES == 0)
362  };
363  };
364 
366  typedef typename If<IsMultiple<int>::IS_MULTIPLE,
367  unsigned int,
369  unsigned short,
370  unsigned char>::Type>::Type ShuffleWord;
371 
373  typedef typename If<IsMultiple<long long>::IS_MULTIPLE,
374  unsigned long long,
376 
378  typedef typename If<IsMultiple<longlong2>::IS_MULTIPLE,
379  ulonglong2,
381 
383  typedef typename If<IsMultiple<int4>::IS_MULTIPLE,
384  uint4,
386  uint2,
387  ShuffleWord>::Type>::Type TextureWord;
388 };
389 
390 
391 // float2 specialization workaround (for SM10-SM13)
392 template <>
393 struct UnitWord <float2>
394 {
395  typedef int ShuffleWord;
396 #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130)
397  typedef float VolatileWord;
398  typedef uint2 DeviceWord;
399 #else
400  typedef unsigned long long VolatileWord;
401  typedef unsigned long long DeviceWord;
402 #endif
403  typedef float2 TextureWord;
404 };
405 
406 // float4 specialization workaround (for SM10-SM13)
407 template <>
408 struct UnitWord <float4>
409 {
410  typedef int ShuffleWord;
411 #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130)
412  typedef float VolatileWord;
413  typedef uint4 DeviceWord;
414 #else
415  typedef unsigned long long VolatileWord;
416  typedef ulonglong2 DeviceWord;
417 #endif
418  typedef float4 TextureWord;
419 };
420 
421 
422 // char2 specialization workaround (for SM10-SM13)
423 template <>
424 struct UnitWord <char2>
425 {
426  typedef unsigned short ShuffleWord;
427 #if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130)
428  typedef unsigned short VolatileWord;
429  typedef short DeviceWord;
430 #else
431  typedef unsigned short VolatileWord;
432  typedef unsigned short DeviceWord;
433 #endif
434  typedef unsigned short TextureWord;
435 };
436 
437 
438 template <typename T> struct UnitWord<volatile T> : UnitWord<T> {};
439 template <typename T> struct UnitWord<const T> : UnitWord<T> {};
440 template <typename T> struct UnitWord<const volatile T> : UnitWord<T> {};
441 
442 
443 #endif // DOXYGEN_SHOULD_SKIP_THIS
444 
445 
446 
447 /******************************************************************************
448  * Vector type inference utilities.
449  ******************************************************************************/
450 
454 template <typename T, int vec_elements> struct CubVector;
455 
456 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
457 
458 enum
459 {
462 };
463 
464 
468 template <typename T>
469 struct CubVector<T, 1>
470 {
471  T x;
472 
473  typedef T BaseType;
474  typedef CubVector<T, 1> Type;
475 };
476 
480 template <typename T>
481 struct CubVector<T, 2>
482 {
483  T x;
484  T y;
485 
486  typedef T BaseType;
487  typedef CubVector<T, 2> Type;
488 };
489 
493 template <typename T>
494 struct CubVector<T, 3>
495 {
496  T x;
497  T y;
498  T z;
499 
500  typedef T BaseType;
501  typedef CubVector<T, 3> Type;
502 };
503 
507 template <typename T>
508 struct CubVector<T, 4>
509 {
510  T x;
511  T y;
512  T z;
513  T w;
514 
515  typedef T BaseType;
516  typedef CubVector<T, 4> Type;
517 };
518 
519 
523 #define CUB_DEFINE_VECTOR_TYPE(base_type,short_type) \
524  \
525  template<> struct CubVector<base_type, 1> : short_type##1 \
526  { \
527  typedef base_type BaseType; \
528  typedef short_type##1 Type; \
529  __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \
530  CubVector retval; \
531  retval.x = x + other.x; \
532  return retval; \
533  } \
534  __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \
535  CubVector retval; \
536  retval.x = x - other.x; \
537  return retval; \
538  } \
539  }; \
540  \
541  template<> struct CubVector<base_type, 2> : short_type##2 \
542  { \
543  typedef base_type BaseType; \
544  typedef short_type##2 Type; \
545  __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \
546  CubVector retval; \
547  retval.x = x + other.x; \
548  retval.y = y + other.y; \
549  return retval; \
550  } \
551  __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \
552  CubVector retval; \
553  retval.x = x - other.x; \
554  retval.y = y - other.y; \
555  return retval; \
556  } \
557  }; \
558  \
559  template<> struct CubVector<base_type, 3> : short_type##3 \
560  { \
561  typedef base_type BaseType; \
562  typedef short_type##3 Type; \
563  __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \
564  CubVector retval; \
565  retval.x = x + other.x; \
566  retval.y = y + other.y; \
567  retval.z = z + other.z; \
568  return retval; \
569  } \
570  __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \
571  CubVector retval; \
572  retval.x = x - other.x; \
573  retval.y = y - other.y; \
574  retval.z = z - other.z; \
575  return retval; \
576  } \
577  }; \
578  \
579  template<> struct CubVector<base_type, 4> : short_type##4 \
580  { \
581  typedef base_type BaseType; \
582  typedef short_type##4 Type; \
583  __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \
584  CubVector retval; \
585  retval.x = x + other.x; \
586  retval.y = y + other.y; \
587  retval.z = z + other.z; \
588  retval.w = w + other.w; \
589  return retval; \
590  } \
591  __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \
592  CubVector retval; \
593  retval.x = x - other.x; \
594  retval.y = y - other.y; \
595  retval.z = z - other.z; \
596  retval.w = w - other.w; \
597  return retval; \
598  } \
599  };
600 
601 
602 
603 // Expand CUDA vector types for built-in primitives
604 CUB_DEFINE_VECTOR_TYPE(char, char)
605 CUB_DEFINE_VECTOR_TYPE(signed char, char)
606 CUB_DEFINE_VECTOR_TYPE(short, short)
607 CUB_DEFINE_VECTOR_TYPE(int, int)
608 CUB_DEFINE_VECTOR_TYPE(long, long)
609 CUB_DEFINE_VECTOR_TYPE(long long, longlong)
610 CUB_DEFINE_VECTOR_TYPE(unsigned char, uchar)
611 CUB_DEFINE_VECTOR_TYPE(unsigned short, ushort)
612 CUB_DEFINE_VECTOR_TYPE(unsigned int, uint)
613 CUB_DEFINE_VECTOR_TYPE(unsigned long, ulong)
614 CUB_DEFINE_VECTOR_TYPE(unsigned long long, ulonglong)
615 CUB_DEFINE_VECTOR_TYPE(float, float)
616 CUB_DEFINE_VECTOR_TYPE(double, double)
617 CUB_DEFINE_VECTOR_TYPE(bool, uchar)
618 
619 // Undefine macros
620 #undef CUB_DEFINE_VECTOR_TYPE
621 
622 #endif // DOXYGEN_SHOULD_SKIP_THIS
623 
624 
625 
626 /******************************************************************************
627  * Wrapper types
628  ******************************************************************************/
629 
633 template <typename T>
635 {
638 
639  enum
640  {
641  WORDS = sizeof(T) / sizeof(DeviceWord)
642  };
643 
646 
648  __host__ __device__ __forceinline__ T& Alias()
649  {
650  return reinterpret_cast<T&>(*this);
651  }
652 };
653 
654 
658 template <
659  typename _Key,
660  typename _Value
661 #if defined(_WIN32) && !defined(_WIN64)
664 #endif // #if defined(_WIN32) && !defined(_WIN64)
665  >
667 {
668  typedef _Key Key;
669  typedef _Value Value;
670 
673 
675  __host__ __device__ __forceinline__
677 
679  __host__ __device__ __forceinline__
680  KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {}
681 
683  __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b)
684  {
685  return (value != b.value) || (key != b.key);
686  }
687 };
688 
689 #if defined(_WIN32) && !defined(_WIN64)
690 
703 template <typename K, typename V>
705 struct KeyValuePair<K, V, true, false>
706 {
707  typedef K Key;
708  typedef V Value;
709 
711 
712  Value value; // Value has larger would-be alignment and goes first
713  Key key;
714  Pad pad;
715 
717  __host__ __device__ __forceinline__
718  KeyValuePair() {}
719 
721  __host__ __device__ __forceinline__
722  KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {}
723 
725  __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b)
726  {
727  return (value != b.value) || (key != b.key);
728  }
729 };
730 
731 
733 template <typename K, typename V>
734 struct KeyValuePair<K, V, false, true>
735 {
736  typedef K Key;
737  typedef V Value;
738 
740 
741  Key key; // Key has larger would-be alignment and goes first
742  Value value;
743  Pad pad;
744 
746  __host__ __device__ __forceinline__
747  KeyValuePair() {}
748 
750  __host__ __device__ __forceinline__
751  KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {}
752 
754  __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b)
755  {
756  return (value != b.value) || (key != b.key);
757  }
758 };
759 
760 #endif // #if defined(_WIN32) && !defined(_WIN64)
761 
762 
763 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
764 
765 
769 template <typename T, int COUNT>
771 {
772 
774  T array[COUNT];
775 
777  __host__ __device__ __forceinline__ ArrayWrapper() {}
778 };
779 
780 #endif // DOXYGEN_SHOULD_SKIP_THIS
781 
790 template <typename T>
792 {
794  T *d_buffers[2];
795 
797  int selector;
798 
800  __host__ __device__ __forceinline__ DoubleBuffer()
801  {
802  selector = 0;
803  d_buffers[0] = NULL;
804  d_buffers[1] = NULL;
805  }
806 
808  __host__ __device__ __forceinline__ DoubleBuffer(
809  T *d_current,
810  T *d_alternate)
811  {
812  selector = 0;
813  d_buffers[0] = d_current;
814  d_buffers[1] = d_alternate;
815  }
816 
818  __host__ __device__ __forceinline__ T* Current() { return d_buffers[selector]; }
819 
821  __host__ __device__ __forceinline__ T* Alternate() { return d_buffers[selector ^ 1]; }
822 
823 };
824 
825 
826 
827 /******************************************************************************
828  * Typedef-detection
829  ******************************************************************************/
830 
831 
835 #define CUB_DEFINE_DETECT_NESTED_TYPE(detector_name, nested_type_name) \
836  template <typename T> \
837  struct detector_name \
838  { \
839  template <typename C> \
840  static char& test(typename C::nested_type_name*); \
841  template <typename> \
842  static int& test(...); \
843  enum \
844  { \
845  VALUE = sizeof(test<T>(0)) < sizeof(int) \
846  }; \
847  };
848 
849 
850 
851 /******************************************************************************
852  * Simple enable-if (similar to Boost)
853  ******************************************************************************/
854 
858 template <bool Condition, class T = void>
859 struct EnableIf
860 {
862  typedef T Type;
863 };
864 
865 
866 template <class T>
868 
869 
870 
871 /******************************************************************************
872  * Typedef-detection
873  ******************************************************************************/
874 
878 template <typename T, typename BinaryOp>
880 {
881 private:
882 /*
883  template <typename BinaryOpT, bool (BinaryOpT::*)(const T &a, const T &b, unsigned int idx) const> struct SFINAE1 {};
884  template <typename BinaryOpT, bool (BinaryOpT::*)(const T &a, const T &b, unsigned int idx)> struct SFINAE2 {};
885  template <typename BinaryOpT, bool (BinaryOpT::*)(T a, T b, unsigned int idx) const> struct SFINAE3 {};
886  template <typename BinaryOpT, bool (BinaryOpT::*)(T a, T b, unsigned int idx)> struct SFINAE4 {};
887 */
888  template <typename BinaryOpT, bool (BinaryOpT::*)(const T &a, const T &b, int idx) const> struct SFINAE5 {};
889  template <typename BinaryOpT, bool (BinaryOpT::*)(const T &a, const T &b, int idx)> struct SFINAE6 {};
890  template <typename BinaryOpT, bool (BinaryOpT::*)(T a, T b, int idx) const> struct SFINAE7 {};
891  template <typename BinaryOpT, bool (BinaryOpT::*)(T a, T b, int idx)> struct SFINAE8 {};
892 /*
893  template <typename BinaryOpT> static char Test(SFINAE1<BinaryOpT, &BinaryOpT::operator()> *);
894  template <typename BinaryOpT> static char Test(SFINAE2<BinaryOpT, &BinaryOpT::operator()> *);
895  template <typename BinaryOpT> static char Test(SFINAE3<BinaryOpT, &BinaryOpT::operator()> *);
896  template <typename BinaryOpT> static char Test(SFINAE4<BinaryOpT, &BinaryOpT::operator()> *);
897 */
898  template <typename BinaryOpT> __host__ __device__ static char Test(SFINAE5<BinaryOpT, &BinaryOpT::operator()> *);
899  template <typename BinaryOpT> __host__ __device__ static char Test(SFINAE6<BinaryOpT, &BinaryOpT::operator()> *);
900  template <typename BinaryOpT> __host__ __device__ static char Test(SFINAE7<BinaryOpT, &BinaryOpT::operator()> *);
901  template <typename BinaryOpT> __host__ __device__ static char Test(SFINAE8<BinaryOpT, &BinaryOpT::operator()> *);
902 
903  template <typename BinaryOpT> static int Test(...);
904 
905 public:
906 
908  static const bool HAS_PARAM = sizeof(Test<BinaryOp>(NULL)) == sizeof(char);
909 };
910 
911 
912 
913 
914 /******************************************************************************
915  * Simple type traits utilities.
916  *
917  * For example:
918  * Traits<int>::CATEGORY // SIGNED_INTEGER
919  * Traits<NullType>::NULL_TYPE // true
920  * Traits<uint4>::CATEGORY // NOT_A_NUMBER
921  * Traits<uint4>::PRIMITIVE; // false
922  *
923  ******************************************************************************/
924 
929 {
930  NOT_A_NUMBER,
931  SIGNED_INTEGER,
932  UNSIGNED_INTEGER,
933  FLOATING_POINT
934 };
935 
936 
940 template <Category _CATEGORY, bool _PRIMITIVE, bool _NULL_TYPE, typename _UnsignedBits, typename T>
942 {
944  static const Category CATEGORY = _CATEGORY;
945  enum
946  {
947  PRIMITIVE = _PRIMITIVE,
948  NULL_TYPE = _NULL_TYPE,
949  };
950 };
951 
952 
956 template <typename _UnsignedBits, typename T>
958 {
959  typedef _UnsignedBits UnsignedBits;
960 
961  static const Category CATEGORY = UNSIGNED_INTEGER;
962  static const UnsignedBits LOWEST_KEY = UnsignedBits(0);
963  static const UnsignedBits MAX_KEY = UnsignedBits(-1);
964 
965  enum
966  {
967  PRIMITIVE = true,
968  NULL_TYPE = false,
969  };
970 
971 
972  static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
973  {
974  return key;
975  }
976 
977  static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
978  {
979  return key;
980  }
981 
982  static __host__ __device__ __forceinline__ T Max()
983  {
984  UnsignedBits retval = MAX_KEY;
985  return reinterpret_cast<T&>(retval);
986  }
987 
988  static __host__ __device__ __forceinline__ T Lowest()
989  {
990  UnsignedBits retval = LOWEST_KEY;
991  return reinterpret_cast<T&>(retval);
992  }
993 };
994 
995 
999 template <typename _UnsignedBits, typename T>
1001 {
1002  typedef _UnsignedBits UnsignedBits;
1003 
1004  static const Category CATEGORY = SIGNED_INTEGER;
1005  static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1);
1006  static const UnsignedBits LOWEST_KEY = HIGH_BIT;
1007  static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT;
1008 
1009  enum
1010  {
1011  PRIMITIVE = true,
1012  NULL_TYPE = false,
1013  };
1014 
1015  static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
1016  {
1017  return key ^ HIGH_BIT;
1018  };
1019 
1020  static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
1021  {
1022  return key ^ HIGH_BIT;
1023  };
1024 
1025  static __host__ __device__ __forceinline__ T Max()
1026  {
1027  UnsignedBits retval = MAX_KEY;
1028  return reinterpret_cast<T&>(retval);
1029  }
1030 
1031  static __host__ __device__ __forceinline__ T Lowest()
1032  {
1033  UnsignedBits retval = LOWEST_KEY;
1034  return reinterpret_cast<T&>(retval);
1035  }
1036 };
1037 
1038 template <typename _T>
1039 struct FpLimits;
1040 
1041 template <>
1043 {
1044  static __host__ __device__ __forceinline__ float Max() {
1045  return FLT_MAX;
1046  }
1047 
1048  static __host__ __device__ __forceinline__ float Lowest() {
1049  return FLT_MAX * float(-1);
1050  }
1051 };
1052 
1053 template <>
1055 {
1056  static __host__ __device__ __forceinline__ double Max() {
1057  return DBL_MAX;
1058  }
1059 
1060  static __host__ __device__ __forceinline__ double Lowest() {
1061  return DBL_MAX * double(-1);
1062  }
1063 };
1064 
1065 
1066 #if (__CUDACC_VER_MAJOR__ >= 9)
1067 template <>
1068 struct FpLimits<__half>
1069 {
1070  static __host__ __device__ __forceinline__ __half Max() {
1071  unsigned short max_word = 0x7BFF;
1072  return reinterpret_cast<__half&>(max_word);
1073  }
1074 
1075  static __host__ __device__ __forceinline__ __half Lowest() {
1076  unsigned short lowest_word = 0xFBFF;
1077  return reinterpret_cast<__half&>(lowest_word);
1078  }
1079 };
1080 #endif
1081 
1082 
1086 template <typename _UnsignedBits, typename T>
1088 {
1089  typedef _UnsignedBits UnsignedBits;
1090 
1091  static const Category CATEGORY = FLOATING_POINT;
1092  static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1);
1093  static const UnsignedBits LOWEST_KEY = UnsignedBits(-1);
1094  static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT;
1095 
1096  enum
1097  {
1098  PRIMITIVE = true,
1099  NULL_TYPE = false,
1100  };
1101 
1102  static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key)
1103  {
1104  UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT;
1105  return key ^ mask;
1106  };
1107 
1108  static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key)
1109  {
1110  UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1);
1111  return key ^ mask;
1112  };
1113 
1114  static __host__ __device__ __forceinline__ T Max() {
1115  return FpLimits<T>::Max();
1116  }
1117 
1118  static __host__ __device__ __forceinline__ T Lowest() {
1119  return FpLimits<T>::Lowest();
1120  }
1121 };
1122 
1123 
1128 
1130 
1131 template <> struct NumericTraits<char> : BaseTraits<(std::numeric_limits<char>::is_signed) ? SIGNED_INTEGER : UNSIGNED_INTEGER, true, false, unsigned char, char> {};
1137 
1143 
1146 #if (__CUDACC_VER_MAJOR__ >= 9)
1148 #endif
1149 
1151 
1152 
1153 
1157 template <typename T>
1158 struct Traits : NumericTraits<typename RemoveQualifiers<T>::Type> {};
1159 
1160 
1161 #endif // DOXYGEN_SHOULD_SKIP_THIS
1162 
1163  // end group UtilModule
1165 
1166 } // CUB namespace
1167 CUB_NS_POSTFIX // Optional outer namespace(s)
Type equality test.
Definition: util_type.cuh:98
Determine whether or not BinaryOp's functor is of the form bool operator()(const T& a,...
Definition: util_type.cuh:879
__host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b)
Inequality operator.
Definition: util_type.cuh:683
__host__ __device__ __forceinline__ DoubleBuffer()
Constructor.
Definition: util_type.cuh:800
Key key
Item key.
Definition: util_type.cuh:671
Type traits.
Definition: util_type.cuh:1158
ThenType Type
Conditional type result.
Definition: util_type.cuh:75
The maximum number of elements in CUDA vector types.
Definition: util_type.cuh:461
_Key Key
Key data type.
Definition: util_type.cuh:668
Value value
Item value.
Definition: util_type.cuh:672
Optional outer namespace(s)
Volatile modifier test.
Definition: util_type.cuh:195
Category
Basic type traits categories.
Definition: util_type.cuh:928
If< IsMultiple< long long >::IS_MULTIPLE, unsigned long long, ShuffleWord >::Type VolatileWord
Biggest volatile word that T is a whole multiple of and is not larger than the alignment of T.
Definition: util_type.cuh:375
The "true CUDA" alignment of T in bytes.
Definition: util_type.cuh:301
__host__ __device__ __forceinline__ ArrayWrapper()
Constructor.
Definition: util_type.cuh:777
If< IsMultiple< longlong2 >::IS_MULTIPLE, ulonglong2, VolatileWord >::Type DeviceWord
Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T.
Definition: util_type.cuh:380
If< IsMultiple< int4 >::IS_MULTIPLE, uint4, typename If< IsMultiple< int2 >::IS_MULTIPLE, uint2, ShuffleWord >::Type >::Type TextureWord
Biggest texture reference word that T is a whole multiple of and is not larger than the alignment of ...
Definition: util_type.cuh:387
Unit-words of data movement.
Definition: util_type.cuh:350
Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...
Definition: util_type.cuh:791
#define CUB_DEFINE_VECTOR_TYPE(base_type, short_type)
Definition: util_type.cuh:523
A key identifier paired with a corresponding value.
Definition: util_type.cuh:666
T Type
The "truly aligned" type.
Definition: util_type.cuh:305
If< IsMultiple< int >::IS_MULTIPLE, unsigned int, typename If< IsMultiple< short >::IS_MULTIPLE, unsigned short, unsigned char >::Type >::Type ShuffleWord
Biggest shuffle word that T is a whole multiple of and is not larger than the alignment of T.
Definition: util_type.cuh:370
__host__ __device__ __forceinline__ KeyValuePair(Key const &key, Value const &value)
Constructor.
Definition: util_type.cuh:680
__host__ __device__ __forceinline__ T * Current()
Return pointer to the currently valid buffer.
Definition: util_type.cuh:818
Simple enable-if (similar to Boost)
Definition: util_type.cuh:859
__host__ __device__ __forceinline__ KeyValuePair()
Constructor.
Definition: util_type.cuh:676
Up Type
Type without const and volatile qualifiers.
Definition: util_type.cuh:225
BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block.
int selector
Selector into d_buffers (i.e., the active/valid buffer)
Definition: util_type.cuh:797
Default max functor.
Statically determine if N is a power-of-two.
Definition: util_type.cuh:155
Numeric type traits.
Definition: util_type.cuh:1127
UnitWord< T >::DeviceWord DeviceWord
Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T.
Definition: util_type.cuh:637
T Type
Enable-if type for SFINAE dummy variables.
Definition: util_type.cuh:862
Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...
Definition: util_type.cuh:275
Pointer vs. iterator.
Definition: util_type.cuh:170
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.
Definition: util_type.cuh:634
Structure alignment.
Definition: util_type.cuh:290
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
Type selection (IF ? ThenType : ElseType)
Definition: util_type.cuh:72
Statically determine log2(N), rounded up.
Definition: util_type.cuh:132
A simple "NULL" marker type.
Definition: util_type.cuh:256
Exposes a member typedef Type that names the corresponding CUDA vector type if one exists....
Definition: util_type.cuh:454
_Value Value
Value data type.
Definition: util_type.cuh:669
A wrapper for passing simple static arrays as kernel parameters.
Definition: util_type.cuh:770
Basic type traits.
Definition: util_type.cuh:941
DeviceWord storage[WORDS]
Backing storage.
Definition: util_type.cuh:645
__host__ __device__ __forceinline__ DoubleBuffer(T *d_current, T *d_alternate)
Constructor.
Definition: util_type.cuh:808
__host__ __device__ __forceinline__ T * Alternate()
Return pointer to the currently invalid buffer.
Definition: util_type.cuh:821
__host__ __device__ __forceinline__ T & Alias()
Alias.
Definition: util_type.cuh:648
Removes const and volatile qualifiers from type Tp.
Definition: util_type.cuh:222