OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
device_spmv.cuh
Go to the documentation of this file.
1 
2 /******************************************************************************
3  * Copyright (c) 2011, Duane Merrill. All rights reserved.
4  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
35 #pragma once
36 
37 #include <stdio.h>
38 #include <iterator>
39 #include <limits>
40 
42 #include "../util_namespace.cuh"
43 
45 CUB_NS_PREFIX
46 
48 namespace cub {
49 
50 
70 struct DeviceSpmv
71 {
72  /******************************************************************/
76 
129  template <
130  typename ValueT>
131  CUB_RUNTIME_FUNCTION
132  static cudaError_t CsrMV(
133  void* d_temp_storage,
134  size_t& temp_storage_bytes,
135  ValueT* d_values,
136  int* d_row_offsets,
137  int* d_column_indices,
138  ValueT* d_vector_x,
139  ValueT* d_vector_y,
140  int num_rows,
141  int num_cols,
142  int num_nonzeros,
143  cudaStream_t stream = 0,
144  bool debug_synchronous = false)
145  {
147  spmv_params.d_values = d_values;
148  spmv_params.d_row_end_offsets = d_row_offsets + 1;
149  spmv_params.d_column_indices = d_column_indices;
150  spmv_params.d_vector_x = d_vector_x;
151  spmv_params.d_vector_y = d_vector_y;
152  spmv_params.num_rows = num_rows;
153  spmv_params.num_cols = num_cols;
154  spmv_params.num_nonzeros = num_nonzeros;
155  spmv_params.alpha = 1.0;
156  spmv_params.beta = 0.0;
157 
159  d_temp_storage,
160  temp_storage_bytes,
161  spmv_params,
162  stream,
163  debug_synchronous);
164  }
165 
167 };
168 
169 
170 
171 } // CUB namespace
172 CUB_NS_POSTFIX // Optional outer namespace(s)
173 
174 
Optional outer namespace(s)
ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< int, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< CounterT *, NUM_ACTIVE_CHANNELS > ArrayWrapper< OutputDecodeOpT, NUM_ACTIVE_CHANNELS > ArrayWrapper< PrivatizedDecodeOpT, NUM_ACTIVE_CHANNELS > OffsetT OffsetT num_rows
The number of rows in the region of interest.
DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multi...
Definition: device_spmv.cuh:70
CUB_RUNTIME_FUNCTION static __forceinline__ cudaError_t Dispatch(void *d_temp_storage, size_t &temp_storage_bytes, SpmvParamsT &spmv_params, cudaStream_t stream, bool debug_synchronous, Spmv1ColKernelT spmv_1col_kernel, SpmvSearchKernelT spmv_search_kernel, SpmvKernelT spmv_kernel, SegmentFixupKernelT segment_fixup_kernel, KernelConfig spmv_config, KernelConfig segment_fixup_config)
< Function type of cub::DeviceSegmentFixupKernelT
OffsetT spmv_params
[in] SpMV input parameter bundle
< Signed integer type for sequence offsets
static CUB_RUNTIME_FUNCTION cudaError_t CsrMV(void *d_temp_storage, size_t &temp_storage_bytes, ValueT *d_values, int *d_row_offsets, int *d_column_indices, ValueT *d_vector_x, ValueT *d_vector_y, int num_rows, int num_cols, int num_nonzeros, cudaStream_t stream=0, bool debug_synchronous=false)
This function performs the matrix-vector operation y = A*x.