39 #include "../util_ptx.cuh"
40 #include "../util_arch.cuh"
41 #include "../util_type.cuh"
42 #include "../util_namespace.cuh"
123 int ITEMS_PER_THREAD,
124 typename ValueT = NullType,
126 bool MEMOIZE_OUTER_SCAN = (
CUB_PTX_ARCH >= 350) ?
true :
false,
128 cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte,
143 BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z,
151 typedef typename KeyTraits::UnsignedBits UnsignedBits;
159 INNER_SCAN_ALGORITHM,
172 INNER_SCAN_ALGORITHM,
213 return private_storage;
218 UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD],
219 int (&ranks)[ITEMS_PER_THREAD],
233 UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD],
234 int (&ranks)[ITEMS_PER_THREAD],
248 ValueT (&values)[ITEMS_PER_THREAD],
249 int (&ranks)[ITEMS_PER_THREAD],
261 ValueT (&values)[ITEMS_PER_THREAD],
262 int (&ranks)[ITEMS_PER_THREAD],
273 template <
int IS_BLOCKED>
275 ValueT (&)[ITEMS_PER_THREAD],
276 int (&)[ITEMS_PER_THREAD],
282 template <
int DESCENDING,
int KEYS_ONLY>
284 KeyT (&keys)[ITEMS_PER_THREAD],
285 ValueT (&values)[ITEMS_PER_THREAD],
291 UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] =
292 reinterpret_cast<UnsignedBits (&)[ITEMS_PER_THREAD]
>(keys);
296 for (
int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++)
298 unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]);
307 int ranks[ITEMS_PER_THREAD];
308 RankKeys(unsigned_keys, ranks, begin_bit,
pass_bits, is_descending);
309 begin_bit += RADIX_BITS;
320 if (begin_bit >=
end_bit)
break;
327 for (
int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++)
329 unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]);
335 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
338 template <
int DESCENDING,
int KEYS_ONLY>
340 KeyT (&keys)[ITEMS_PER_THREAD],
341 ValueT (&values)[ITEMS_PER_THREAD],
347 UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] =
348 reinterpret_cast<UnsignedBits (&)[ITEMS_PER_THREAD]
>(keys);
352 for (
int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++)
354 unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]);
363 int ranks[ITEMS_PER_THREAD];
364 RankKeys(unsigned_keys, ranks, begin_bit,
pass_bits, is_descending);
365 begin_bit += RADIX_BITS;
393 for (
int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++)
395 unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]);
399 #endif // DOXYGEN_SHOULD_SKIP_THIS
415 temp_storage(PrivateStorage()),
416 linear_tid(
RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
426 temp_storage(temp_storage.Alias()),
427 linear_tid(
RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
474 __device__ __forceinline__
void Sort(
475 KeyT (&keys)[ITEMS_PER_THREAD],
477 int end_bit =
sizeof(KeyT) * 8)
529 __device__ __forceinline__
void Sort(
530 KeyT (&keys)[ITEMS_PER_THREAD],
531 ValueT (&values)[ITEMS_PER_THREAD],
533 int end_bit =
sizeof(KeyT) * 8)
576 KeyT (&keys)[ITEMS_PER_THREAD],
578 int end_bit =
sizeof(KeyT) * 8)
631 KeyT (&keys)[ITEMS_PER_THREAD],
632 ValueT (&values)[ITEMS_PER_THREAD],
634 int end_bit =
sizeof(KeyT) * 8)
686 KeyT (&keys)[ITEMS_PER_THREAD],
688 int end_bit =
sizeof(KeyT) * 8)
741 KeyT (&keys)[ITEMS_PER_THREAD],
742 ValueT (&values)[ITEMS_PER_THREAD],
744 int end_bit =
sizeof(KeyT) * 8)
789 KeyT (&keys)[ITEMS_PER_THREAD],
791 int end_bit =
sizeof(KeyT) * 8)
844 KeyT (&keys)[ITEMS_PER_THREAD],
845 ValueT (&values)[ITEMS_PER_THREAD],
847 int end_bit =
sizeof(KeyT) * 8)