OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
device_histogram.cuh
Go to the documentation of this file.
1 
2 /******************************************************************************
3  * Copyright (c) 2011, Duane Merrill. All rights reserved.
4  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
35 #pragma once
36 
37 #include <stdio.h>
38 #include <iterator>
39 #include <limits>
40 
42 #include "../util_namespace.cuh"
43 
45 CUB_NS_PREFIX
46 
48 namespace cub {
49 
50 
64 {
65  /******************************************************************/
69 
118  template <
119  typename SampleIteratorT,
120  typename CounterT,
121  typename LevelT,
122  typename OffsetT>
123  CUB_RUNTIME_FUNCTION
124  static cudaError_t HistogramEven(
125  void* d_temp_storage,
126  size_t& temp_storage_bytes,
127  SampleIteratorT d_samples,
128  CounterT* d_histogram,
129  int num_levels,
130  LevelT lower_level,
131  LevelT upper_level,
132  OffsetT num_samples,
133  cudaStream_t stream = 0,
134  bool debug_synchronous = false)
135  {
137  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
138 
139  CounterT* d_histogram1[1] = {d_histogram};
140  int num_levels1[1] = {num_levels};
141  LevelT lower_level1[1] = {lower_level};
142  LevelT upper_level1[1] = {upper_level};
143 
144  return MultiHistogramEven<1, 1>(
145  d_temp_storage,
146  temp_storage_bytes,
147  d_samples,
148  d_histogram1,
149  num_levels1,
150  lower_level1,
151  upper_level1,
152  num_samples,
153  1,
154  sizeof(SampleT) * num_samples,
155  stream,
156  debug_synchronous);
157  }
158 
159 
217  template <
218  typename SampleIteratorT,
219  typename CounterT,
220  typename LevelT,
221  typename OffsetT>
222  CUB_RUNTIME_FUNCTION
223  static cudaError_t HistogramEven(
224  void* d_temp_storage,
225  size_t& temp_storage_bytes,
226  SampleIteratorT d_samples,
227  CounterT* d_histogram,
228  int num_levels,
229  LevelT lower_level,
230  LevelT upper_level,
231  OffsetT num_row_samples,
232  OffsetT num_rows,
233  size_t row_stride_bytes,
234  cudaStream_t stream = 0,
235  bool debug_synchronous = false)
236  {
237  CounterT* d_histogram1[1] = {d_histogram};
238  int num_levels1[1] = {num_levels};
239  LevelT lower_level1[1] = {lower_level};
240  LevelT upper_level1[1] = {upper_level};
241 
242  return MultiHistogramEven<1, 1>(
243  d_temp_storage,
244  temp_storage_bytes,
245  d_samples,
246  d_histogram1,
247  num_levels1,
248  lower_level1,
249  upper_level1,
250  num_row_samples,
251  num_rows,
252  row_stride_bytes,
253  stream,
254  debug_synchronous);
255  }
256 
317  template <
318  int NUM_CHANNELS,
319  int NUM_ACTIVE_CHANNELS,
320  typename SampleIteratorT,
321  typename CounterT,
322  typename LevelT,
323  typename OffsetT>
324  CUB_RUNTIME_FUNCTION
325  static cudaError_t MultiHistogramEven(
326  void* d_temp_storage,
327  size_t& temp_storage_bytes,
328  SampleIteratorT d_samples,
329  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
330  int num_levels[NUM_ACTIVE_CHANNELS],
331  LevelT lower_level[NUM_ACTIVE_CHANNELS],
332  LevelT upper_level[NUM_ACTIVE_CHANNELS],
333  OffsetT num_pixels,
334  cudaStream_t stream = 0,
335  bool debug_synchronous = false)
336  {
338  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
339 
340  return MultiHistogramEven<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
341  d_temp_storage,
342  temp_storage_bytes,
343  d_samples,
344  d_histogram,
345  num_levels,
346  lower_level,
347  upper_level,
348  num_pixels,
349  1,
350  sizeof(SampleT) * NUM_CHANNELS * num_pixels,
351  stream,
352  debug_synchronous);
353  }
354 
355 
424  template <
425  int NUM_CHANNELS,
426  int NUM_ACTIVE_CHANNELS,
427  typename SampleIteratorT,
428  typename CounterT,
429  typename LevelT,
430  typename OffsetT>
431  CUB_RUNTIME_FUNCTION
432  static cudaError_t MultiHistogramEven(
433  void* d_temp_storage,
434  size_t& temp_storage_bytes,
435  SampleIteratorT d_samples,
436  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
437  int num_levels[NUM_ACTIVE_CHANNELS],
438  LevelT lower_level[NUM_ACTIVE_CHANNELS],
439  LevelT upper_level[NUM_ACTIVE_CHANNELS],
441  OffsetT num_rows,
442  size_t row_stride_bytes,
443  cudaStream_t stream = 0,
444  bool debug_synchronous = false)
445  {
447  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
448  Int2Type<sizeof(SampleT) == 1> is_byte_sample;
449 
450  if ((sizeof(OffsetT) > sizeof(int)) &&
451  ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) std::numeric_limits<int>::max()))
452  {
453  // Down-convert OffsetT data type
454 
455 
457  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,
458  (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)),
459  stream, debug_synchronous, is_byte_sample);
460  }
461 
463  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,
464  num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)),
465  stream, debug_synchronous, is_byte_sample);
466  }
467 
468 
470  /******************************************************************/
474 
522  template <
523  typename SampleIteratorT,
524  typename CounterT,
525  typename LevelT,
526  typename OffsetT>
527  CUB_RUNTIME_FUNCTION
528  static cudaError_t HistogramRange(
529  void* d_temp_storage,
530  size_t& temp_storage_bytes,
531  SampleIteratorT d_samples,
532  CounterT* d_histogram,
533  int num_levels,
534  LevelT* d_levels,
535  OffsetT num_samples,
536  cudaStream_t stream = 0,
537  bool debug_synchronous = false)
538  {
540  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
541 
542  CounterT* d_histogram1[1] = {d_histogram};
543  int num_levels1[1] = {num_levels};
544  LevelT* d_levels1[1] = {d_levels};
545 
546  return MultiHistogramRange<1, 1>(
547  d_temp_storage,
548  temp_storage_bytes,
549  d_samples,
550  d_histogram1,
551  num_levels1,
552  d_levels1,
553  num_samples,
554  1,
555  sizeof(SampleT) * num_samples,
556  stream,
557  debug_synchronous);
558  }
559 
560 
617  template <
618  typename SampleIteratorT,
619  typename CounterT,
620  typename LevelT,
621  typename OffsetT>
622  CUB_RUNTIME_FUNCTION
623  static cudaError_t HistogramRange(
624  void* d_temp_storage,
625  size_t& temp_storage_bytes,
626  SampleIteratorT d_samples,
627  CounterT* d_histogram,
628  int num_levels,
629  LevelT* d_levels,
630  OffsetT num_row_samples,
631  OffsetT num_rows,
632  size_t row_stride_bytes,
633  cudaStream_t stream = 0,
634  bool debug_synchronous = false)
635  {
636  CounterT* d_histogram1[1] = {d_histogram};
637  int num_levels1[1] = {num_levels};
638  LevelT* d_levels1[1] = {d_levels};
639 
640  return MultiHistogramRange<1, 1>(
641  d_temp_storage,
642  temp_storage_bytes,
643  d_samples,
644  d_histogram1,
645  num_levels1,
646  d_levels1,
647  num_row_samples,
648  num_rows,
649  row_stride_bytes,
650  stream,
651  debug_synchronous);
652  }
653 
714  template <
715  int NUM_CHANNELS,
716  int NUM_ACTIVE_CHANNELS,
717  typename SampleIteratorT,
718  typename CounterT,
719  typename LevelT,
720  typename OffsetT>
721  CUB_RUNTIME_FUNCTION
722  static cudaError_t MultiHistogramRange(
723  void* d_temp_storage,
724  size_t& temp_storage_bytes,
725  SampleIteratorT d_samples,
726  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
727  int num_levels[NUM_ACTIVE_CHANNELS],
728  LevelT* d_levels[NUM_ACTIVE_CHANNELS],
729  OffsetT num_pixels,
730  cudaStream_t stream = 0,
731  bool debug_synchronous = false)
732  {
734  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
735 
736  return MultiHistogramRange<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
737  d_temp_storage,
738  temp_storage_bytes,
739  d_samples,
740  d_histogram,
741  num_levels,
742  d_levels,
743  num_pixels,
744  1,
745  sizeof(SampleT) * NUM_CHANNELS * num_pixels,
746  stream,
747  debug_synchronous);
748  }
749 
750 
817  template <
818  int NUM_CHANNELS,
819  int NUM_ACTIVE_CHANNELS,
820  typename SampleIteratorT,
821  typename CounterT,
822  typename LevelT,
823  typename OffsetT>
824  CUB_RUNTIME_FUNCTION
825  static cudaError_t MultiHistogramRange(
826  void* d_temp_storage,
827  size_t& temp_storage_bytes,
828  SampleIteratorT d_samples,
829  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
830  int num_levels[NUM_ACTIVE_CHANNELS],
831  LevelT* d_levels[NUM_ACTIVE_CHANNELS],
833  OffsetT num_rows,
834  size_t row_stride_bytes,
835  cudaStream_t stream = 0,
836  bool debug_synchronous = false)
837  {
839  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
840  Int2Type<sizeof(SampleT) == 1> is_byte_sample;
841 
842  if ((sizeof(OffsetT) > sizeof(int)) &&
843  ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) std::numeric_limits<int>::max()))
844  {
845  // Down-convert OffsetT data type
847  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,
848  (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)),
849  stream, debug_synchronous, is_byte_sample);
850  }
851 
853  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,
854  num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)),
855  stream, debug_synchronous, is_byte_sample);
856  }
857 
858 
859 
861 };
862 
863 } // CUB namespace
864 CUB_NS_POSTFIX // Optional outer namespace(s)
865 
866 
Optional outer namespace(s)
static CUB_RUNTIME_FUNCTION cudaError_t HistogramRange(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT *d_levels, OffsetT num_row_samples, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
Computes an intensity histogram from a sequence of data samples using the specified bin boundary leve...
ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< OutputDecodeOpT, NUM_ACTIVE_CHANNELS > ArrayWrapper< PrivatizedDecodeOpT, NUM_ACTIVE_CHANNELS > OffsetT OffsetT num_rows
The number of rows in the region of interest.
OffsetT OffsetT
[in] Total number of input data items
DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequenc...
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramRange(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_pixels, cudaStream_t stream=0, bool debug_synchronous=false)
Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using...
Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static c...
Definition: util_type.cuh:275
static CUB_RUNTIME_FUNCTION cudaError_t HistogramEven(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT lower_level, LevelT upper_level, OffsetT num_samples, cudaStream_t stream=0, bool debug_synchronous=false)
Computes an intensity histogram from a sequence of data samples using equal-width bins.
static CUB_RUNTIME_FUNCTION cudaError_t HistogramEven(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT lower_level, LevelT upper_level, OffsetT num_row_samples, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
Computes an intensity histogram from a sequence of data samples using equal-width bins.
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramEven(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using...
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramRange(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, size_t row_stride_bytes, cudaStream_t stream=0, bool debug_synchronous=false)
Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using...
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t DispatchEven(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_output_histograms[NUM_ACTIVE_CHANNELS], int num_output_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, OffsetT row_stride_samples, cudaStream_t stream, bool debug_synchronous, Int2Type< false > is_byte_sample)
ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< OutputDecodeOpT, NUM_ACTIVE_CHANNELS > ArrayWrapper< PrivatizedDecodeOpT, NUM_ACTIVE_CHANNELS > OffsetT num_row_pixels
The number of multi-channel pixels per row in the region of interest.
static CUB_RUNTIME_FUNCTION cudaError_t DispatchRange(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_output_histograms[NUM_ACTIVE_CHANNELS], int num_output_levels[NUM_ACTIVE_CHANNELS], LevelT *d_levels[NUM_ACTIVE_CHANNELS], OffsetT num_row_pixels, OffsetT num_rows, OffsetT row_stride_samples, cudaStream_t stream, bool debug_synchronous, Int2Type< false > is_byte_sample)
static CUB_RUNTIME_FUNCTION cudaError_t MultiHistogramEven(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram[NUM_ACTIVE_CHANNELS], int num_levels[NUM_ACTIVE_CHANNELS], LevelT lower_level[NUM_ACTIVE_CHANNELS], LevelT upper_level[NUM_ACTIVE_CHANNELS], OffsetT num_pixels, cudaStream_t stream=0, bool debug_synchronous=false)
Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using...
static CUB_RUNTIME_FUNCTION cudaError_t HistogramRange(void *d_temp_storage, size_t &temp_storage_bytes, SampleIteratorT d_samples, CounterT *d_histogram, int num_levels, LevelT *d_levels, OffsetT num_samples, cudaStream_t stream=0, bool debug_synchronous=false)
Computes an intensity histogram from a sequence of data samples using the specified bin boundary leve...