#include <stdio.h>
#include <iterator>
#include "dispatch_scan.cuh"
#include "../../agent/agent_reduce_by_key.cuh"
#include "../../thread/thread_operators.cuh"
#include "../../grid/grid_queue.cuh"
#include "../../util_device.cuh"
#include "../../util_namespace.cuh"
Go to the source code of this file.
Namespaces | |
namespace | cub |
Optional outer namespace(s) | |
Functions | |
template<typename AgentReduceByKeyPolicyT , typename KeysInputIteratorT , typename UniqueOutputIteratorT , typename ValuesInputIteratorT , typename AggregatesOutputIteratorT , typename NumRunsOutputIteratorT , typename ScanTileStateT , typename EqualityOpT , typename ReductionOpT , typename OffsetT > | |
cub::__launch_bounds__ (int(AgentReduceByKeyPolicyT::BLOCK_THREADS)) __global__ void DeviceReduceByKeyKernel(KeysInputIteratorT d_keys_in | |
< Signed integer type for global offsets | |
cub::AgentReduceByKeyT (temp_storage, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, equality_op, reduction_op).ConsumeRange(num_items | |
Variables | |
UniqueOutputIteratorT | cub::d_unique_out |
< Pointer to the input sequence of keys | |
UniqueOutputIteratorT ValuesInputIteratorT AggregatesOutputIteratorT | cub::d_aggregates_out |
Pointer to the output sequence of value aggregates (one aggregate per run) | |
UniqueOutputIteratorT ValuesInputIteratorT AggregatesOutputIteratorT NumRunsOutputIteratorT | cub::d_num_runs_out |
Pointer to total number of runs encountered (i.e., the length of d_unique_out) | |
UniqueOutputIteratorT ValuesInputIteratorT AggregatesOutputIteratorT NumRunsOutputIteratorT ScanTileStateT | cub::tile_state |
Tile status interface. | |
UniqueOutputIteratorT ValuesInputIteratorT AggregatesOutputIteratorT NumRunsOutputIteratorT ScanTileStateT int | cub::start_tile |
The starting tile for the current grid. | |
UniqueOutputIteratorT ValuesInputIteratorT AggregatesOutputIteratorT NumRunsOutputIteratorT ScanTileStateT int EqualityOpT | cub::equality_op |
KeyT equality operator. | |
cub::DeviceReduceByKey provides device-wide, parallel operations for reducing segments of values residing within device-accessible memory.
Definition in file dispatch_reduce_by_key.cuh.