36#include "../util_type.cuh"
37#include "../util_ptx.cuh"
38#include "../util_namespace.cuh"
140 return private_storage;
145 template <typename FlagOp, bool HAS_PARAM = BinaryOpHasIdxParam<T, FlagOp>::HAS_PARAM>
149 static __device__ __forceinline__
bool FlagT(FlagOp flag_op,
const T &a,
const T &b,
int idx)
151 return flag_op(a, b, idx);
156 template <
typename FlagOp>
160 static __device__ __forceinline__
bool FlagT(FlagOp flag_op,
const T &a,
const T &b,
int )
162 return flag_op(a, b);
167 template <
int ITERATION,
int MAX_ITERATIONS>
172 int ITEMS_PER_THREAD,
177 FlagT (&flags)[ITEMS_PER_THREAD],
178 T (&input)[ITEMS_PER_THREAD],
179 T (&preds)[ITEMS_PER_THREAD],
182 preds[ITERATION] = input[ITERATION - 1];
195 int ITEMS_PER_THREAD,
200 FlagT (&flags)[ITEMS_PER_THREAD],
201 T (&input)[ITEMS_PER_THREAD],
207 input[ITERATION + 1],
208 (
linear_tid * ITEMS_PER_THREAD) + ITERATION + 1);
216 template <
int MAX_ITERATIONS>
217 struct Iterate<MAX_ITERATIONS, MAX_ITERATIONS>
221 int ITEMS_PER_THREAD,
224 static __device__ __forceinline__
void FlagHeads(
226 FlagT (&)[ITEMS_PER_THREAD],
227 T (&)[ITEMS_PER_THREAD],
228 T (&)[ITEMS_PER_THREAD],
234 int ITEMS_PER_THREAD,
237 static __device__ __forceinline__
void FlagTails(
239 FlagT (&)[ITEMS_PER_THREAD],
240 T (&)[ITEMS_PER_THREAD],
296#ifndef DOXYGEN_SHOULD_SKIP_THIS
299 int ITEMS_PER_THREAD,
303 FlagT (&head_flags)[ITEMS_PER_THREAD],
304 T (&input)[ITEMS_PER_THREAD],
305 T (&preds)[ITEMS_PER_THREAD],
329 int ITEMS_PER_THREAD,
333 FlagT (&head_flags)[ITEMS_PER_THREAD],
334 T (&input)[ITEMS_PER_THREAD],
335 T (&preds)[ITEMS_PER_THREAD],
337 T tile_predecessor_item)
346 tile_predecessor_item :
408 int ITEMS_PER_THREAD,
412 FlagT (&head_flags)[ITEMS_PER_THREAD],
413 T (&input)[ITEMS_PER_THREAD],
416 T preds[ITEMS_PER_THREAD];
417 FlagHeads(head_flags, input, preds, flag_op);
477 int ITEMS_PER_THREAD,
481 FlagT (&head_flags)[ITEMS_PER_THREAD],
482 T (&input)[ITEMS_PER_THREAD],
484 T tile_predecessor_item)
486 T preds[ITEMS_PER_THREAD];
487 FlagHeads(head_flags, input, preds, flag_op, tile_predecessor_item);
550 int ITEMS_PER_THREAD,
554 FlagT (&tail_flags)[ITEMS_PER_THREAD],
555 T (&input)[ITEMS_PER_THREAD],
568 input[ITEMS_PER_THREAD - 1],
570 (
linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
634 int ITEMS_PER_THREAD,
638 FlagT (&tail_flags)[ITEMS_PER_THREAD],
639 T (&input)[ITEMS_PER_THREAD],
641 T tile_successor_item)
650 tile_successor_item :
655 input[ITEMS_PER_THREAD - 1],
657 (
linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
732 int ITEMS_PER_THREAD,
736 FlagT (&head_flags)[ITEMS_PER_THREAD],
737 FlagT (&tail_flags)[ITEMS_PER_THREAD],
738 T (&input)[ITEMS_PER_THREAD],
747 T preds[ITEMS_PER_THREAD];
770 input[ITEMS_PER_THREAD - 1],
772 (
linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
848 int ITEMS_PER_THREAD,
852 FlagT (&head_flags)[ITEMS_PER_THREAD],
853 FlagT (&tail_flags)[ITEMS_PER_THREAD],
854 T tile_successor_item,
855 T (&input)[ITEMS_PER_THREAD],
864 T preds[ITEMS_PER_THREAD];
883 tile_successor_item :
888 input[ITEMS_PER_THREAD - 1],
890 (
linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
972 int ITEMS_PER_THREAD,
976 FlagT (&head_flags)[ITEMS_PER_THREAD],
977 T tile_predecessor_item,
978 FlagT (&tail_flags)[ITEMS_PER_THREAD],
979 T (&input)[ITEMS_PER_THREAD],
988 T preds[ITEMS_PER_THREAD];
992 tile_predecessor_item :
1006 input[ITEMS_PER_THREAD - 1],
1008 (
linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
1091 int ITEMS_PER_THREAD,
1095 FlagT (&head_flags)[ITEMS_PER_THREAD],
1096 T tile_predecessor_item,
1097 FlagT (&tail_flags)[ITEMS_PER_THREAD],
1098 T tile_successor_item,
1099 T (&input)[ITEMS_PER_THREAD],
1108 T preds[ITEMS_PER_THREAD];
1112 tile_predecessor_item :
1123 tile_successor_item :
1128 input[ITEMS_PER_THREAD - 1],
1130 (
linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD);
The BlockDiscontinuity class provides collective methods for flagging discontinuities within an order...
__device__ __forceinline__ void FlagHeadsAndTails(FlagT(&head_flags)[ITEMS_PER_THREAD], T tile_predecessor_item, FlagT(&tail_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets both head and tail flags indicating discontinuities between items partitioned across the thread ...
__device__ __forceinline__ void FlagTails(FlagT(&tail_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets tail flags indicating discontinuities between items partitioned across the thread block,...
unsigned int linear_tid
Linear thread-id.
__device__ __forceinline__ void FlagHeads(FlagT(&head_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], T(&preds)[ITEMS_PER_THREAD], FlagOp flag_op, T tile_predecessor_item)
__device__ __forceinline__ void FlagHeads(FlagT(&head_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], T(&preds)[ITEMS_PER_THREAD], FlagOp flag_op)
__device__ __forceinline__ void FlagTails(FlagT(&tail_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, T tile_successor_item)
Sets tail flags indicating discontinuities between items partitioned across the thread block.
__device__ __forceinline__ void FlagHeadsAndTails(FlagT(&head_flags)[ITEMS_PER_THREAD], FlagT(&tail_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets both head and tail flags indicating discontinuities between items partitioned across the thread ...
_TempStorage & temp_storage
Shared storage reference.
__device__ __forceinline__ BlockDiscontinuity(TempStorage &temp_storage)
Collective constructor using the specified memory allocation as temporary storage.
__device__ __forceinline__ void FlagHeads(FlagT(&head_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets head flags indicating discontinuities between items partitioned across the thread block,...
__device__ __forceinline__ BlockDiscontinuity()
Collective constructor using a private static allocation of shared memory as temporary storage.
__device__ __forceinline__ void FlagHeadsAndTails(FlagT(&head_flags)[ITEMS_PER_THREAD], FlagT(&tail_flags)[ITEMS_PER_THREAD], T tile_successor_item, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets both head and tail flags indicating discontinuities between items partitioned across the thread ...
__device__ __forceinline__ void FlagHeads(FlagT(&head_flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, T tile_predecessor_item)
Sets head flags indicating discontinuities between items partitioned across the thread block.
__device__ __forceinline__ void FlagHeadsAndTails(FlagT(&head_flags)[ITEMS_PER_THREAD], T tile_predecessor_item, FlagT(&tail_flags)[ITEMS_PER_THREAD], T tile_successor_item, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
Sets both head and tail flags indicating discontinuities between items partitioned across the thread ...
@ BLOCK_THREADS
The thread block size in threads.
__device__ __forceinline__ _TempStorage & PrivateStorage()
Internal storage allocator.
__device__ __forceinline__ int RowMajorTid(int block_dim_x, int block_dim_y, int block_dim_z)
Returns the row-major linear thread identifier for a multidimensional thread block.
Optional outer namespace(s)
Specialization for when FlagOp has third index param.
Templated unrolling of item comparison (inductive case)
static __device__ __forceinline__ void FlagHeads(int linear_tid, FlagT(&flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], T(&preds)[ITEMS_PER_THREAD], FlagOp flag_op)
static __device__ __forceinline__ void FlagTails(int linear_tid, FlagT(&flags)[ITEMS_PER_THREAD], T(&input)[ITEMS_PER_THREAD], FlagOp flag_op)
\smemstorage{BlockDiscontinuity}
Shared memory storage layout type (last element from each thread's input)
A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions.
#define CUB_PTX_ARCH
CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host p...