OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
block_discontinuity.cuh
Go to the documentation of this file.
1/******************************************************************************
2 * Copyright (c) 2011, Duane Merrill. All rights reserved.
3 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of the NVIDIA CORPORATION nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 *
27 ******************************************************************************/
28
34#pragma once
35
36#include "../util_type.cuh"
37#include "../util_ptx.cuh"
38#include "../util_namespace.cuh"
39
41CUB_NS_PREFIX
42
44namespace cub {
45
102template <
103 typename T,
104 int BLOCK_DIM_X,
105 int BLOCK_DIM_Y = 1,
106 int BLOCK_DIM_Z = 1,
107 int PTX_ARCH = CUB_PTX_ARCH>
109{
110private:
111
112 /******************************************************************************
113 * Constants and type definitions
114 ******************************************************************************/
115
117 enum
118 {
120 BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z,
121 };
122
123
126 {
127 T first_items[BLOCK_THREADS];
128 T last_items[BLOCK_THREADS];
129 };
130
131
132 /******************************************************************************
133 * Utility methods
134 ******************************************************************************/
135
137 __device__ __forceinline__ _TempStorage& PrivateStorage()
138 {
139 __shared__ _TempStorage private_storage;
140 return private_storage;
141 }
142
143
145 template <typename FlagOp, bool HAS_PARAM = BinaryOpHasIdxParam<T, FlagOp>::HAS_PARAM>
146 struct ApplyOp
147 {
148 // Apply flag operator
149 static __device__ __forceinline__ bool FlagT(FlagOp flag_op, const T &a, const T &b, int idx)
150 {
151 return flag_op(a, b, idx);
152 }
153 };
154
156 template <typename FlagOp>
157 struct ApplyOp<FlagOp, false>
158 {
159 // Apply flag operator
160 static __device__ __forceinline__ bool FlagT(FlagOp flag_op, const T &a, const T &b, int /*idx*/)
161 {
162 return flag_op(a, b);
163 }
164 };
165
167 template <int ITERATION, int MAX_ITERATIONS>
168 struct Iterate
169 {
170 // Head flags
171 template <
172 int ITEMS_PER_THREAD,
173 typename FlagT,
174 typename FlagOp>
175 static __device__ __forceinline__ void FlagHeads(
176 int linear_tid,
177 FlagT (&flags)[ITEMS_PER_THREAD],
178 T (&input)[ITEMS_PER_THREAD],
179 T (&preds)[ITEMS_PER_THREAD],
180 FlagOp flag_op)
181 {
182 preds[ITERATION] = input[ITERATION - 1];
183
184 flags[ITERATION] = ApplyOp<FlagOp>::FlagT(
185 flag_op,
186 preds[ITERATION],
187 input[ITERATION],
188 (linear_tid * ITEMS_PER_THREAD) + ITERATION);
189
191 }
192
193 // Tail flags
194 template <
195 int ITEMS_PER_THREAD,
196 typename FlagT,
197 typename FlagOp>
198 static __device__ __forceinline__ void FlagTails(
199 int linear_tid,
200 FlagT (&flags)[ITEMS_PER_THREAD],
201 T (&input)[ITEMS_PER_THREAD],
202 FlagOp flag_op)
203 {
204 flags[ITERATION] = ApplyOp<FlagOp>::FlagT(
205 flag_op,
206 input[ITERATION],
207 input[ITERATION + 1],
208 (linear_tid * ITEMS_PER_THREAD) + ITERATION + 1);
209
211 }
212
213 };
214
216 template <int MAX_ITERATIONS>
217 struct Iterate<MAX_ITERATIONS, MAX_ITERATIONS>
218 {
219 // Head flags
220 template <
221 int ITEMS_PER_THREAD,
222 typename FlagT,
223 typename FlagOp>
224 static __device__ __forceinline__ void FlagHeads(
225 int /*linear_tid*/,
226 FlagT (&/*flags*/)[ITEMS_PER_THREAD],
227 T (&/*input*/)[ITEMS_PER_THREAD],
228 T (&/*preds*/)[ITEMS_PER_THREAD],
229 FlagOp /*flag_op*/)
230 {}
231
232 // Tail flags
233 template <
234 int ITEMS_PER_THREAD,
235 typename FlagT,
236 typename FlagOp>
237 static __device__ __forceinline__ void FlagTails(
238 int /*linear_tid*/,
239 FlagT (&/*flags*/)[ITEMS_PER_THREAD],
240 T (&/*input*/)[ITEMS_PER_THREAD],
241 FlagOp /*flag_op*/)
242 {}
243 };
244
245
246 /******************************************************************************
247 * Thread fields
248 ******************************************************************************/
249
252
254 unsigned int linear_tid;
255
256
257public:
258
260 struct TempStorage : Uninitialized<_TempStorage> {};
261
262
263 /******************************************************************/
267
271 __device__ __forceinline__ BlockDiscontinuity()
272 :
274 linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
275 {}
276
277
281 __device__ __forceinline__ BlockDiscontinuity(
283 :
284 temp_storage(temp_storage.Alias()),
285 linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z))
286 {}
287
288
290 /******************************************************************/
294
295
296#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
297
298 template <
299 int ITEMS_PER_THREAD,
300 typename FlagT,
301 typename FlagOp>
302 __device__ __forceinline__ void FlagHeads(
303 FlagT (&head_flags)[ITEMS_PER_THREAD],
304 T (&input)[ITEMS_PER_THREAD],
305 T (&preds)[ITEMS_PER_THREAD],
306 FlagOp flag_op)
307 {
308 // Share last item
309 temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
310
311 CTA_SYNC();
312
313 if (linear_tid == 0)
314 {
315 // Set flag for first thread-item (preds[0] is undefined)
316 head_flags[0] = 1;
317 }
318 else
319 {
320 preds[0] = temp_storage.last_items[linear_tid - 1];
321 head_flags[0] = ApplyOp<FlagOp>::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD);
322 }
323
324 // Set head_flags for remaining items
325 Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
326 }
327
328 template <
329 int ITEMS_PER_THREAD,
330 typename FlagT,
331 typename FlagOp>
332 __device__ __forceinline__ void FlagHeads(
333 FlagT (&head_flags)[ITEMS_PER_THREAD],
334 T (&input)[ITEMS_PER_THREAD],
335 T (&preds)[ITEMS_PER_THREAD],
336 FlagOp flag_op,
337 T tile_predecessor_item)
338 {
339 // Share last item
340 temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
341
342 CTA_SYNC();
343
344 // Set flag for first thread-item
345 preds[0] = (linear_tid == 0) ?
346 tile_predecessor_item : // First thread
347 temp_storage.last_items[linear_tid - 1];
348
349 head_flags[0] = ApplyOp<FlagOp>::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD);
350
351 // Set head_flags for remaining items
352 Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
353 }
354
355#endif // DOXYGEN_SHOULD_SKIP_THIS
356
357
407 template <
408 int ITEMS_PER_THREAD,
409 typename FlagT,
410 typename FlagOp>
411 __device__ __forceinline__ void FlagHeads(
412 FlagT (&head_flags)[ITEMS_PER_THREAD],
413 T (&input)[ITEMS_PER_THREAD],
414 FlagOp flag_op)
415 {
416 T preds[ITEMS_PER_THREAD];
417 FlagHeads(head_flags, input, preds, flag_op);
418 }
419
420
476 template <
477 int ITEMS_PER_THREAD,
478 typename FlagT,
479 typename FlagOp>
480 __device__ __forceinline__ void FlagHeads(
481 FlagT (&head_flags)[ITEMS_PER_THREAD],
482 T (&input)[ITEMS_PER_THREAD],
483 FlagOp flag_op,
484 T tile_predecessor_item)
485 {
486 T preds[ITEMS_PER_THREAD];
487 FlagHeads(head_flags, input, preds, flag_op, tile_predecessor_item);
488 }
489
490
491
493 /******************************************************************/
497
498
549 template <
550 int ITEMS_PER_THREAD,
551 typename FlagT,
552 typename FlagOp>
553 __device__ __forceinline__ void FlagTails(
554 FlagT (&tail_flags)[ITEMS_PER_THREAD],
555 T (&input)[ITEMS_PER_THREAD],
556 FlagOp flag_op)
557 {
558 // Share first item
559 temp_storage.first_items[linear_tid] = input[0];
560
561 CTA_SYNC();
562
563 // Set flag for last thread-item
564 tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ?
565 1 : // Last thread
567 flag_op,
568 input[ITEMS_PER_THREAD - 1],
569 temp_storage.first_items[linear_tid + 1],
570 (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
571
572 // Set tail_flags for remaining items
573 Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op);
574 }
575
576
633 template <
634 int ITEMS_PER_THREAD,
635 typename FlagT,
636 typename FlagOp>
637 __device__ __forceinline__ void FlagTails(
638 FlagT (&tail_flags)[ITEMS_PER_THREAD],
639 T (&input)[ITEMS_PER_THREAD],
640 FlagOp flag_op,
641 T tile_successor_item)
642 {
643 // Share first item
644 temp_storage.first_items[linear_tid] = input[0];
645
646 CTA_SYNC();
647
648 // Set flag for last thread-item
649 T successor_item = (linear_tid == BLOCK_THREADS - 1) ?
650 tile_successor_item : // Last thread
651 temp_storage.first_items[linear_tid + 1];
652
653 tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp<FlagOp>::FlagT(
654 flag_op,
655 input[ITEMS_PER_THREAD - 1],
656 successor_item,
657 (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
658
659 // Set tail_flags for remaining items
660 Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op);
661 }
662
663
665 /******************************************************************/
669
670
731 template <
732 int ITEMS_PER_THREAD,
733 typename FlagT,
734 typename FlagOp>
735 __device__ __forceinline__ void FlagHeadsAndTails(
736 FlagT (&head_flags)[ITEMS_PER_THREAD],
737 FlagT (&tail_flags)[ITEMS_PER_THREAD],
738 T (&input)[ITEMS_PER_THREAD],
739 FlagOp flag_op)
740 {
741 // Share first and last items
742 temp_storage.first_items[linear_tid] = input[0];
743 temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
744
745 CTA_SYNC();
746
747 T preds[ITEMS_PER_THREAD];
748
749 // Set flag for first thread-item
750 preds[0] = temp_storage.last_items[linear_tid - 1];
751 if (linear_tid == 0)
752 {
753 head_flags[0] = 1;
754 }
755 else
756 {
757 head_flags[0] = ApplyOp<FlagOp>::FlagT(
758 flag_op,
759 preds[0],
760 input[0],
761 linear_tid * ITEMS_PER_THREAD);
762 }
763
764
765 // Set flag for last thread-item
766 tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ?
767 1 : // Last thread
769 flag_op,
770 input[ITEMS_PER_THREAD - 1],
771 temp_storage.first_items[linear_tid + 1],
772 (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
773
774 // Set head_flags for remaining items
775 Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
776
777 // Set tail_flags for remaining items
778 Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op);
779 }
780
781
847 template <
848 int ITEMS_PER_THREAD,
849 typename FlagT,
850 typename FlagOp>
851 __device__ __forceinline__ void FlagHeadsAndTails(
852 FlagT (&head_flags)[ITEMS_PER_THREAD],
853 FlagT (&tail_flags)[ITEMS_PER_THREAD],
854 T tile_successor_item,
855 T (&input)[ITEMS_PER_THREAD],
856 FlagOp flag_op)
857 {
858 // Share first and last items
859 temp_storage.first_items[linear_tid] = input[0];
860 temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
861
862 CTA_SYNC();
863
864 T preds[ITEMS_PER_THREAD];
865
866 // Set flag for first thread-item
867 if (linear_tid == 0)
868 {
869 head_flags[0] = 1;
870 }
871 else
872 {
873 preds[0] = temp_storage.last_items[linear_tid - 1];
874 head_flags[0] = ApplyOp<FlagOp>::FlagT(
875 flag_op,
876 preds[0],
877 input[0],
878 linear_tid * ITEMS_PER_THREAD);
879 }
880
881 // Set flag for last thread-item
882 T successor_item = (linear_tid == BLOCK_THREADS - 1) ?
883 tile_successor_item : // Last thread
884 temp_storage.first_items[linear_tid + 1];
885
886 tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp<FlagOp>::FlagT(
887 flag_op,
888 input[ITEMS_PER_THREAD - 1],
889 successor_item,
890 (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
891
892 // Set head_flags for remaining items
893 Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
894
895 // Set tail_flags for remaining items
896 Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op);
897 }
898
899
971 template <
972 int ITEMS_PER_THREAD,
973 typename FlagT,
974 typename FlagOp>
975 __device__ __forceinline__ void FlagHeadsAndTails(
976 FlagT (&head_flags)[ITEMS_PER_THREAD],
977 T tile_predecessor_item,
978 FlagT (&tail_flags)[ITEMS_PER_THREAD],
979 T (&input)[ITEMS_PER_THREAD],
980 FlagOp flag_op)
981 {
982 // Share first and last items
983 temp_storage.first_items[linear_tid] = input[0];
984 temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
985
986 CTA_SYNC();
987
988 T preds[ITEMS_PER_THREAD];
989
990 // Set flag for first thread-item
991 preds[0] = (linear_tid == 0) ?
992 tile_predecessor_item : // First thread
993 temp_storage.last_items[linear_tid - 1];
994
995 head_flags[0] = ApplyOp<FlagOp>::FlagT(
996 flag_op,
997 preds[0],
998 input[0],
999 linear_tid * ITEMS_PER_THREAD);
1000
1001 // Set flag for last thread-item
1002 tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ?
1003 1 : // Last thread
1005 flag_op,
1006 input[ITEMS_PER_THREAD - 1],
1007 temp_storage.first_items[linear_tid + 1],
1008 (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
1009
1010 // Set head_flags for remaining items
1011 Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
1012
1013 // Set tail_flags for remaining items
1014 Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op);
1015 }
1016
1017
1090 template <
1091 int ITEMS_PER_THREAD,
1092 typename FlagT,
1093 typename FlagOp>
1094 __device__ __forceinline__ void FlagHeadsAndTails(
1095 FlagT (&head_flags)[ITEMS_PER_THREAD],
1096 T tile_predecessor_item,
1097 FlagT (&tail_flags)[ITEMS_PER_THREAD],
1098 T tile_successor_item,
1099 T (&input)[ITEMS_PER_THREAD],
1100 FlagOp flag_op)
1101 {
1102 // Share first and last items
1103 temp_storage.first_items[linear_tid] = input[0];
1104 temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1];
1105
1106 CTA_SYNC();
1107
1108 T preds[ITEMS_PER_THREAD];
1109
1110 // Set flag for first thread-item
1111 preds[0] = (linear_tid == 0) ?
1112 tile_predecessor_item : // First thread
1113 temp_storage.last_items[linear_tid - 1];
1114
1115 head_flags[0] = ApplyOp<FlagOp>::FlagT(
1116 flag_op,
1117 preds[0],
1118 input[0],
1119 linear_tid * ITEMS_PER_THREAD);
1120
1121 // Set flag for last thread-item
1122 T successor_item = (linear_tid == BLOCK_THREADS - 1) ?
1123 tile_successor_item : // Last thread
1124 temp_storage.first_items[linear_tid + 1];
1125
1126 tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp<FlagOp>::FlagT(
1127 flag_op,
1128 input[ITEMS_PER_THREAD - 1],
1129 successor_item,
1130 (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
1131
1132 // Set head_flags for remaining items
1133 Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op);
1134
1135 // Set tail_flags for remaining items
1136 Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op);
1137 }
1138
1139
1140
1141
1143
1144};
1145
1146
1147} // CUB namespace
1148CUB_NS_POSTFIX // Optional outer namespace(s)
The BlockDiscontinuity class provides collective methods for flagging discontinuities within an order...
__device__ __forceinline__ void FlagHeadsAndTails(FlagT(&head_flags)[ITEMS_PER_THREAD], T tile_predecessor_item, FlagT(&tail_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets both head and tail flags indicating discontinuities between items partitioned across the thread ...
__device__ __forceinline__ void FlagTails(FlagT(&tail_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets tail flags indicating discontinuities between items partitioned across the thread block,...
unsigned int linear_tid
Linear thread-id.
__device__ __forceinline__ void FlagHeads(FlagT(&head_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], T(&preds)[ITEMS_PER_THREAD], FlagOp flag_op, T tile_predecessor_item)
__device__ __forceinline__ void FlagHeads(FlagT(&head_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], T(&preds)[ITEMS_PER_THREAD], FlagOp flag_op)
__device__ __forceinline__ void FlagTails(FlagT(&tail_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, T tile_successor_item)
Sets tail flags indicating discontinuities between items partitioned across the thread block.
__device__ __forceinline__ void FlagHeadsAndTails(FlagT(&head_flags)[ITEMS_PER_THREAD], FlagT(&tail_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets both head and tail flags indicating discontinuities between items partitioned across the thread ...
_TempStorage & temp_storage
Shared storage reference.
__device__ __forceinline__ BlockDiscontinuity(TempStorage &temp_storage)
Collective constructor using the specified memory allocation as temporary storage.
__device__ __forceinline__ void FlagHeads(FlagT(&head_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets head flags indicating discontinuities between items partitioned across the thread block,...
__device__ __forceinline__ BlockDiscontinuity()
Collective constructor using a private static allocation of shared memory as temporary storage.
__device__ __forceinline__ void FlagHeadsAndTails(FlagT(&head_flags)[ITEMS_PER_THREAD], FlagT(&tail_flags)[ITEMS_PER_THREAD], T tile_successor_item, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets both head and tail flags indicating discontinuities between items partitioned across the thread ...
__device__ __forceinline__ void FlagHeads(FlagT(&head_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, T tile_predecessor_item)
Sets head flags indicating discontinuities between items partitioned across the thread block.
__device__ __forceinline__ void FlagHeadsAndTails(FlagT(&head_flags)[ITEMS_PER_THREAD], T tile_predecessor_item, FlagT(&tail_flags)[ITEMS_PER_THREAD], T tile_successor_item, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets both head and tail flags indicating discontinuities between items partitioned across the thread ...
@ BLOCK_THREADS
The thread block size in threads.
__device__ __forceinline__ _TempStorage & PrivateStorage()
Internal storage allocator.
__device__ __forceinline__ int RowMajorTid(int block_dim_x, int block_dim_y, int block_dim_z)
Returns the row-major linear thread identifier for a multidimensional thread block.
Definition util_ptx.cuh:409
CTA_SYNC()
Definition util_ptx.cuh:255
Optional outer namespace(s)
Specialization for when FlagOp has third index param.
Templated unrolling of item comparison (inductive case)
static __device__ __forceinline__ void FlagHeads(int linear_tid, FlagT(&flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], T(&preds)[ITEMS_PER_THREAD], FlagOp flag_op)
static __device__ __forceinline__ void FlagTails(int linear_tid, FlagT(&flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
\smemstorage{BlockDiscontinuity}
Shared memory storage layout type (last element from each thread's input)
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.
#define CUB_PTX_ARCH
CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host p...
Definition util_arch.cuh:53