OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
device_radix_sort.cuh
Go to the documentation of this file.
1
2/******************************************************************************
3 * Copyright (c) 2011, Duane Merrill. All rights reserved.
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the NVIDIA CORPORATION nor the
14 * names of its contributors may be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 ******************************************************************************/
29
35#pragma once
36
37#include <stdio.h>
38#include <iterator>
39
41#include "../util_arch.cuh"
42#include "../util_namespace.cuh"
43
45CUB_NS_PREFIX
46
48namespace cub {
49
50
84{
85
86 /******************************************************************/
90
144 template <
145 typename KeyT,
146 typename ValueT>
147 CUB_RUNTIME_FUNCTION
148 static cudaError_t SortPairs(
149 void *d_temp_storage,
150 size_t &temp_storage_bytes,
151 const KeyT *d_keys_in,
152 KeyT *d_keys_out,
153 const ValueT *d_values_in,
154 ValueT *d_values_out,
155 int num_items,
156 int begin_bit = 0,
157 int end_bit = sizeof(KeyT) * 8,
158 cudaStream_t stream = 0,
159 bool debug_synchronous = false)
160 {
161 // Signed integer type for global offsets
162 typedef int OffsetT;
163
164 DoubleBuffer<KeyT> d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);
165 DoubleBuffer<ValueT> d_values(const_cast<ValueT*>(d_values_in), d_values_out);
166
168 d_temp_storage,
169 temp_storage_bytes,
170 d_keys,
171 d_values,
172 num_items,
173 begin_bit,
174 end_bit,
175 false,
176 stream,
177 debug_synchronous);
178 }
179
180
245 template <
246 typename KeyT,
247 typename ValueT>
248 CUB_RUNTIME_FUNCTION
249 static cudaError_t SortPairs(
250 void *d_temp_storage,
251 size_t &temp_storage_bytes,
252 DoubleBuffer<KeyT> &d_keys,
253 DoubleBuffer<ValueT> &d_values,
254 int num_items,
255 int begin_bit = 0,
256 int end_bit = sizeof(KeyT) * 8,
257 cudaStream_t stream = 0,
258 bool debug_synchronous = false)
259 {
260 // Signed integer type for global offsets
261 typedef int OffsetT;
262
264 d_temp_storage,
265 temp_storage_bytes,
266 d_keys,
267 d_values,
268 num_items,
269 begin_bit,
270 end_bit,
271 true,
272 stream,
273 debug_synchronous);
274 }
275
276
325 template <
326 typename KeyT,
327 typename ValueT>
328 CUB_RUNTIME_FUNCTION
329 static cudaError_t SortPairsDescending(
330 void *d_temp_storage,
331 size_t &temp_storage_bytes,
332 const KeyT *d_keys_in,
333 KeyT *d_keys_out,
334 const ValueT *d_values_in,
335 ValueT *d_values_out,
336 int num_items,
337 int begin_bit = 0,
338 int end_bit = sizeof(KeyT) * 8,
339 cudaStream_t stream = 0,
340 bool debug_synchronous = false)
341 {
342 // Signed integer type for global offsets
343 typedef int OffsetT;
344
345 DoubleBuffer<KeyT> d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);
346 DoubleBuffer<ValueT> d_values(const_cast<ValueT*>(d_values_in), d_values_out);
347
349 d_temp_storage,
350 temp_storage_bytes,
351 d_keys,
352 d_values,
353 num_items,
354 begin_bit,
355 end_bit,
356 false,
357 stream,
358 debug_synchronous);
359 }
360
361
421 template <
422 typename KeyT,
423 typename ValueT>
424 CUB_RUNTIME_FUNCTION
425 static cudaError_t SortPairsDescending(
426 void *d_temp_storage,
427 size_t &temp_storage_bytes,
428 DoubleBuffer<KeyT> &d_keys,
429 DoubleBuffer<ValueT> &d_values,
430 int num_items,
431 int begin_bit = 0,
432 int end_bit = sizeof(KeyT) * 8,
433 cudaStream_t stream = 0,
434 bool debug_synchronous = false)
435 {
436 // Signed integer type for global offsets
437 typedef int OffsetT;
438
440 d_temp_storage,
441 temp_storage_bytes,
442 d_keys,
443 d_values,
444 num_items,
445 begin_bit,
446 end_bit,
447 true,
448 stream,
449 debug_synchronous);
450 }
451
452
454 /******************************************************************/
458
459
505 template <typename KeyT>
506 CUB_RUNTIME_FUNCTION
507 static cudaError_t SortKeys(
508 void *d_temp_storage,
509 size_t &temp_storage_bytes,
510 const KeyT *d_keys_in,
511 KeyT *d_keys_out,
512 int num_items,
513 int begin_bit = 0,
514 int end_bit = sizeof(KeyT) * 8,
515 cudaStream_t stream = 0,
516 bool debug_synchronous = false)
517 {
518 // Signed integer type for global offsets
519 typedef int OffsetT;
520
521 // Null value type
522 DoubleBuffer<KeyT> d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);
523 DoubleBuffer<NullType> d_values;
524
526 d_temp_storage,
527 temp_storage_bytes,
528 d_keys,
529 d_values,
530 num_items,
531 begin_bit,
532 end_bit,
533 false,
534 stream,
535 debug_synchronous);
536 }
537
538
594 template <typename KeyT>
595 CUB_RUNTIME_FUNCTION
596 static cudaError_t SortKeys(
597 void *d_temp_storage,
598 size_t &temp_storage_bytes,
599 DoubleBuffer<KeyT> &d_keys,
600 int num_items,
601 int begin_bit = 0,
602 int end_bit = sizeof(KeyT) * 8,
603 cudaStream_t stream = 0,
604 bool debug_synchronous = false)
605 {
606 // Signed integer type for global offsets
607 typedef int OffsetT;
608
609 // Null value type
610 DoubleBuffer<NullType> d_values;
611
613 d_temp_storage,
614 temp_storage_bytes,
615 d_keys,
616 d_values,
617 num_items,
618 begin_bit,
619 end_bit,
620 true,
621 stream,
622 debug_synchronous);
623 }
624
669 template <typename KeyT>
670 CUB_RUNTIME_FUNCTION
671 static cudaError_t SortKeysDescending(
672 void *d_temp_storage,
673 size_t &temp_storage_bytes,
674 const KeyT *d_keys_in,
675 KeyT *d_keys_out,
676 int num_items,
677 int begin_bit = 0,
678 int end_bit = sizeof(KeyT) * 8,
679 cudaStream_t stream = 0,
680 bool debug_synchronous = false)
681 {
682 // Signed integer type for global offsets
683 typedef int OffsetT;
684
685 DoubleBuffer<KeyT> d_keys(const_cast<KeyT*>(d_keys_in), d_keys_out);
686 DoubleBuffer<NullType> d_values;
687
689 d_temp_storage,
690 temp_storage_bytes,
691 d_keys,
692 d_values,
693 num_items,
694 begin_bit,
695 end_bit,
696 false,
697 stream,
698 debug_synchronous);
699 }
700
701
753 template <typename KeyT>
754 CUB_RUNTIME_FUNCTION
755 static cudaError_t SortKeysDescending(
756 void *d_temp_storage,
757 size_t &temp_storage_bytes,
758 DoubleBuffer<KeyT> &d_keys,
759 int num_items,
760 int begin_bit = 0,
761 int end_bit = sizeof(KeyT) * 8,
762 cudaStream_t stream = 0,
763 bool debug_synchronous = false)
764 {
765 // Signed integer type for global offsets
766 typedef int OffsetT;
767
768 // Null value type
769 DoubleBuffer<NullType> d_values;
770
772 d_temp_storage,
773 temp_storage_bytes,
774 d_keys,
775 d_values,
776 num_items,
777 begin_bit,
778 end_bit,
779 true,
780 stream,
781 debug_synchronous);
782 }
783
784
786
787
788};
789
794} // CUB namespace
795CUB_NS_POSTFIX // Optional outer namespace(s)
796
797
Optional outer namespace(s)
KeyT const ValueT ValueT * d_values_out
[in] Output values buffer
KeyT const ValueT ValueT OffsetT int int end_bit
< [in] The past-the-end (most-significant) bit index needed for key comparison
KeyT const ValueT ValueT OffsetT OffsetT num_items
[in] Total number of input data items
KeyT * d_keys_out
< [in] Input keys buffer
KeyT const ValueT * d_values_in
[in] Input values buffer
OffsetT OffsetT
[in] Total number of input data items
DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequenc...
static CUB_RUNTIME_FUNCTION cudaError_t SortKeys(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts keys into ascending order. (~N auxiliary storage required).
static CUB_RUNTIME_FUNCTION cudaError_t SortKeysDescending(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts keys into descending order. (~N auxiliary storage required).
static CUB_RUNTIME_FUNCTION cudaError_t SortPairsDescending(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, DoubleBuffer< ValueT > &d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts key-value pairs into descending order. (~N auxiliary storage required).
static CUB_RUNTIME_FUNCTION cudaError_t SortKeys(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts keys into ascending order. (~2N auxiliary storage required)
static CUB_RUNTIME_FUNCTION cudaError_t SortPairs(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts key-value pairs into ascending order. (~2N auxiliary storage required)
static CUB_RUNTIME_FUNCTION cudaError_t SortPairsDescending(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, const ValueT *d_values_in, ValueT *d_values_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts key-value pairs into descending order. (~2N auxiliary storage required).
static CUB_RUNTIME_FUNCTION cudaError_t SortKeysDescending(void *d_temp_storage, size_t &temp_storage_bytes, const KeyT *d_keys_in, KeyT *d_keys_out, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts keys into descending order. (~2N auxiliary storage required).
static CUB_RUNTIME_FUNCTION cudaError_t SortPairs(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, DoubleBuffer< ValueT > &d_values, int num_items, int begin_bit=0, int end_bit=sizeof(KeyT) *8, cudaStream_t stream=0, bool debug_synchronous=false)
Sorts key-value pairs into ascending order. (~N auxiliary storage required)
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, DoubleBuffer< KeyT > &d_keys, DoubleBuffer< ValueT > &d_values, OffsetT num_items, int begin_bit, int end_bit, bool is_overwrite_okay, cudaStream_t stream, bool debug_synchronous)
Double-buffer storage wrapper for multi-pass stream transformations that require more than one storag...