OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
device_histogram.cuh
Go to the documentation of this file.
1
2/******************************************************************************
3 * Copyright (c) 2011, Duane Merrill. All rights reserved.
4 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of the NVIDIA CORPORATION nor the
14 * names of its contributors may be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 ******************************************************************************/
29
35#pragma once
36
37#include <stdio.h>
38#include <iterator>
39#include <limits>
40
42#include "../util_namespace.cuh"
43
45CUB_NS_PREFIX
46
48namespace cub {
49
50
64{
65 /******************************************************************/
69
118 template <
119 typename SampleIteratorT,
120 typename CounterT,
121 typename LevelT,
122 typename OffsetT>
123 CUB_RUNTIME_FUNCTION
124 static cudaError_t HistogramEven(
125 void* d_temp_storage,
126 size_t& temp_storage_bytes,
127 SampleIteratorT d_samples,
128 CounterT* d_histogram,
129 int num_levels,
130 LevelT lower_level,
131 LevelT upper_level,
132 OffsetT num_samples,
133 cudaStream_t stream = 0,
134 bool debug_synchronous = false)
135 {
137 typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
138
139 CounterT* d_histogram1[1] = {d_histogram};
140 int num_levels1[1] = {num_levels};
141 LevelT lower_level1[1] = {lower_level};
142 LevelT upper_level1[1] = {upper_level};
143
144 return MultiHistogramEven<1, 1>(
145 d_temp_storage,
146 temp_storage_bytes,
147 d_samples,
148 d_histogram1,
149 num_levels1,
150 lower_level1,
151 upper_level1,
152 num_samples,
153 1,
154 sizeof(SampleT) * num_samples,
155 stream,
156 debug_synchronous);
157 }
158
159
217 template <
218 typename SampleIteratorT,
219 typename CounterT,
220 typename LevelT,
221 typename OffsetT>
222 CUB_RUNTIME_FUNCTION
223 static cudaError_t HistogramEven(
224 void* d_temp_storage,
225 size_t& temp_storage_bytes,
226 SampleIteratorT d_samples,
227 CounterT* d_histogram,
228 int num_levels,
229 LevelT lower_level,
230 LevelT upper_level,
231 OffsetT num_row_samples,
233 size_t row_stride_bytes,
234 cudaStream_t stream = 0,
235 bool debug_synchronous = false)
236 {
237 CounterT* d_histogram1[1] = {d_histogram};
238 int num_levels1[1] = {num_levels};
239 LevelT lower_level1[1] = {lower_level};
240 LevelT upper_level1[1] = {upper_level};
241
242 return MultiHistogramEven<1, 1>(
243 d_temp_storage,
244 temp_storage_bytes,
245 d_samples,
246 d_histogram1,
247 num_levels1,
248 lower_level1,
249 upper_level1,
250 num_row_samples,
251 num_rows,
252 row_stride_bytes,
253 stream,
254 debug_synchronous);
255 }
256
317 template <
318 int NUM_CHANNELS,
319 int NUM_ACTIVE_CHANNELS,
320 typename SampleIteratorT,
321 typename CounterT,
322 typename LevelT,
323 typename OffsetT>
324 CUB_RUNTIME_FUNCTION
325 static cudaError_t MultiHistogramEven(
326 void* d_temp_storage,
327 size_t& temp_storage_bytes,
328 SampleIteratorT d_samples,
329 CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
330 int num_levels[NUM_ACTIVE_CHANNELS],
331 LevelT lower_level[NUM_ACTIVE_CHANNELS],
332 LevelT upper_level[NUM_ACTIVE_CHANNELS],
333 OffsetT num_pixels,
334 cudaStream_t stream = 0,
335 bool debug_synchronous = false)
336 {
338 typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
339
340 return MultiHistogramEven<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
341 d_temp_storage,
342 temp_storage_bytes,
343 d_samples,
344 d_histogram,
345 num_levels,
346 lower_level,
347 upper_level,
348 num_pixels,
349 1,
350 sizeof(SampleT) * NUM_CHANNELS * num_pixels,
351 stream,
352 debug_synchronous);
353 }
354
355
424 template <
425 int NUM_CHANNELS,
426 int NUM_ACTIVE_CHANNELS,
427 typename SampleIteratorT,
428 typename CounterT,
429 typename LevelT,
430 typename OffsetT>
431 CUB_RUNTIME_FUNCTION
432 static cudaError_t MultiHistogramEven(
433 void* d_temp_storage,
434 size_t& temp_storage_bytes,
435 SampleIteratorT d_samples,
436 CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
437 int num_levels[NUM_ACTIVE_CHANNELS],
438 LevelT lower_level[NUM_ACTIVE_CHANNELS],
439 LevelT upper_level[NUM_ACTIVE_CHANNELS],
442 size_t row_stride_bytes,
443 cudaStream_t stream = 0,
444 bool debug_synchronous = false)
445 {
447 typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
448 Int2Type<sizeof(SampleT) == 1> is_byte_sample;
449
450 if ((sizeof(OffsetT) > sizeof(int)) &&
451 ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) std::numeric_limits<int>::max()))
452 {
453 // Down-convert OffsetT data type
454
455
457 d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,
458 (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)),
459 stream, debug_synchronous, is_byte_sample);
460 }
461
463 d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,
464 num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)),
465 stream, debug_synchronous, is_byte_sample);
466 }
467
468
470 /******************************************************************/
474
522 template <
523 typename SampleIteratorT,
524 typename CounterT,
525 typename LevelT,
526 typename OffsetT>
527 CUB_RUNTIME_FUNCTION
528 static cudaError_t HistogramRange(
529 void* d_temp_storage,
530 size_t& temp_storage_bytes,
531 SampleIteratorT d_samples,
532 CounterT* d_histogram,
533 int num_levels,
534 LevelT* d_levels,
535 OffsetT num_samples,
536 cudaStream_t stream = 0,
537 bool debug_synchronous = false)
538 {
540 typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
541
542 CounterT* d_histogram1[1] = {d_histogram};
543 int num_levels1[1] = {num_levels};
544 LevelT* d_levels1[1] = {d_levels};
545
546 return MultiHistogramRange<1, 1>(
547 d_temp_storage,
548 temp_storage_bytes,
549 d_samples,
550 d_histogram1,
551 num_levels1,
552 d_levels1,
553 num_samples,
554 1,
555 sizeof(SampleT) * num_samples,
556 stream,
557 debug_synchronous);
558 }
559
560
617 template <
618 typename SampleIteratorT,
619 typename CounterT,
620 typename LevelT,
621 typename OffsetT>
622 CUB_RUNTIME_FUNCTION
623 static cudaError_t HistogramRange(
624 void* d_temp_storage,
625 size_t& temp_storage_bytes,
626 SampleIteratorT d_samples,
627 CounterT* d_histogram,
628 int num_levels,
629 LevelT* d_levels,
630 OffsetT num_row_samples,
632 size_t row_stride_bytes,
633 cudaStream_t stream = 0,
634 bool debug_synchronous = false)
635 {
636 CounterT* d_histogram1[1] = {d_histogram};
637 int num_levels1[1] = {num_levels};
638 LevelT* d_levels1[1] = {d_levels};
639
640 return MultiHistogramRange<1, 1>(
641 d_temp_storage,
642 temp_storage_bytes,
643 d_samples,
644 d_histogram1,
645 num_levels1,
646 d_levels1,
647 num_row_samples,
648 num_rows,
649 row_stride_bytes,
650 stream,
651 debug_synchronous);
652 }
653
714 template <
715 int NUM_CHANNELS,
716 int NUM_ACTIVE_CHANNELS,
717 typename SampleIteratorT,
718 typename CounterT,
719 typename LevelT,
720 typename OffsetT>
721 CUB_RUNTIME_FUNCTION
722 static cudaError_t MultiHistogramRange(
723 void* d_temp_storage,
724 size_t& temp_storage_bytes,
725 SampleIteratorT d_samples,
726 CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
727 int num_levels[NUM_ACTIVE_CHANNELS],
728 LevelT* d_levels[NUM_ACTIVE_CHANNELS],
729 OffsetT num_pixels,
730 cudaStream_t stream = 0,
731 bool debug_synchronous = false)
732 {
734 typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
735
736 return MultiHistogramRange<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
737 d_temp_storage,
738 temp_storage_bytes,
739 d_samples,
740 d_histogram,
741 num_levels,
742 d_levels,
743 num_pixels,
744 1,
745 sizeof(SampleT) * NUM_CHANNELS * num_pixels,
746 stream,
747 debug_synchronous);
748 }
749
750
817 template <
818 int NUM_CHANNELS,
819 int NUM_ACTIVE_CHANNELS,
820 typename SampleIteratorT,
821 typename CounterT,
822 typename LevelT,
823 typename OffsetT>
824 CUB_RUNTIME_FUNCTION
825 static cudaError_t MultiHistogramRange(
826 void* d_temp_storage,
827 size_t& temp_storage_bytes,
828 SampleIteratorT d_samples,
829 CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
830 int num_levels[NUM_ACTIVE_CHANNELS],
831 LevelT* d_levels[NUM_ACTIVE_CHANNELS],
834 size_t row_stride_bytes,
835 cudaStream_t stream = 0,
836 bool debug_synchronous = false)
837 {
839 typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
840 Int2Type<sizeof(SampleT) == 1> is_byte_sample;
841
842 if ((sizeof(OffsetT) > sizeof(int)) &&
843 ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) std::numeric_limits<int>::max()))
844 {
845 // Down-convert OffsetT data type
847 d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,
848 (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)),
849 stream, debug_synchronous, is_byte_sample);
850 }
851
853 d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,
854 num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)),
855 stream, debug_synchronous, is_byte_sample);
856 }
857
858
859
861};
862
863} // CUB namespace
864CUB_NS_POSTFIX // Optional outer namespace(s)
865
866
Optional outer namespace(s)
ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< OutputDecodeOpT, NUM_ACTIVE_CHANNELS > ArrayWrapper< PrivatizedDecodeOpT, NUM_ACTIVE_CHANNELS > OffsetT OffsetT num_rows
The number of rows in the region of interest.
OffsetT OffsetT
[in] Total number of input data items
ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< OutputDecodeOpT, NUM_ACTIVE_CHANNELS > ArrayWrapper< PrivatizedDecodeOpT, NUM_ACTIVE_CHANNELS > OffsetT num_row_pixels
The number of multi-channel pixels per row in the region of interest.
DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequenc...
static CUB_RUNTIME_FUNCTION cudaError_t HistogramRange(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT *d_levels, OffsetT num_samples, cudaStream_t stream=0, bool debug_synchronous=false)
Computes an intensity histogram from a sequence of data samples using the specified bin boundary leve...
static CUB_RUNTIME_FUNCTION cudaError_t HistogramRange(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT *d_levels, OffsetT num_row_samples, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
Computes an intensity histogram from a sequence of data samples using the specified bin boundary leve...
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramEven(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using...
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramRange(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using...
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramRange(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_pixels, cudaStream_t stream=0, bool debug_synchronous=false)
Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using...
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramEven(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_pixels, cudaStream_t stream=0, bool debug_synchronous=false)
Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using...
static CUB_RUNTIME_FUNCTION cudaError_t HistogramEven(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT lower_level, LevelT upper_level, OffsetT num_samples, cudaStream_t stream=0, bool debug_synchronous=false)
Computes an intensity histogram from a sequence of data samples using equal-width bins.
static CUB_RUNTIME_FUNCTION cudaError_t HistogramEven(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT lower_level, LevelT upper_level, OffsetT num_row_samples, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
Computes an intensity histogram from a sequence of data samples using equal-width bins.
static CUB_RUNTIME_FUNCTION cudaError_t DispatchRange(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_output_histograms[NUM_ACTIVE_CHANNELS], int num_output_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, OffsetT row_stride_samples, cudaStream_t stream, bool debug_synchronous, Int2Type< false > is_byte_sample)
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t DispatchEven(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_output_histograms[NUM_ACTIVE_CHANNELS], int num_output_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, OffsetT row_stride_samples, cudaStream_t stream, bool debug_synchronous, Int2Type< false > is_byte_sample)
Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...