8 #ifndef MAP_VECTOR_SPARSE_HPP_
9 #define MAP_VECTOR_SPARSE_HPP_
11 #include "Vector/map_vector.hpp"
12 #include "Vector/cuda/map_vector_sparse_cuda_ker.cuh"
13 #include "Vector/cuda/map_vector_sparse_cuda_kernels.cuh"
14 #include "util/ofp_context.hpp"
19 #include "util/cuda/kernels.cuh"
22 #include "util/cuda/scan_ofp.cuh"
23 #include "util/cuda/sort_ofp.cuh"
24 #include "util/cuda/segreduce_ofp.cuh"
25 #include "util/cuda/merge_ofp.cuh"
34 template<
typename OfpmVectorT>
35 using ValueTypeOf =
typename std::remove_reference<OfpmVectorT>::type::value_type;
40 template<
typename sg_type>
48 htoD(sg_type &
sg,
unsigned int lele)
57 sg.template hostToDevice<T::value>(lele,lele);
61 constexpr
int VECTOR_SPARSE_STANDARD = 1;
62 constexpr
int VECTOR_SPARSE_BLOCK = 2;
64 template<
typename reduction_type,
unsigned int impl>
67 template<
typename encap_src,
typename encap_dst>
68 static inline void process(encap_src & src, encap_dst & dst)
70 dst = reduction_type::red(dst,src);
74 template<
typename reduction_type>
77 template<
typename encap_src,
typename encap_dst>
78 static inline void process(encap_src & src, encap_dst & dst)
80 for (
size_t i = 0 ; i < encap_src::size ; i++)
82 dst[i] = reduction_type::red(dst[i],src[i]);
87 template<
typename reduction_type>
90 template<
typename encap_src,
typename encap_dst,
unsigned int N1>
91 static inline void process(encap_src & src, encap_dst (& dst)[N1])
93 for (
unsigned int j = 0 ; j < N1 ; j++)
95 for (
size_t i = 0 ; i < encap_dst::size ; i++)
97 dst[i] = reduction_type::red(dst[i][j],src[j][i]);
102 template<
unsigned int N1,
unsigned int blockSize,
typename encap_src,
typename encap_dst>
103 static inline void process_e(encap_src & src, encap_dst & dst)
105 for (
unsigned int j = 0 ; j < N1 ; j++)
107 for (
size_t i = 0 ; i < blockSize ; i++)
109 dst[i] = reduction_type::red(dst[i][j],src[i][j]);
119 template<
unsigned int impl,
typename block_functor>
122 template <
unsigned int p,
typename vector_index_type>
123 static void extendSegments(vector_index_type & segments,
size_t dataSize)
127 segments.resize(segments.size()+1);
128 segments.template get<p>(segments.size() - 1) = dataSize;
129 segments.template hostToDevice<p>(segments.size() - 1, segments.size() - 1);
131 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
135 template <
unsigned int pSegment,
typename vector_reduction,
typename T,
typename vector_data_type,
typename vector_index_type ,
typename vector_index_type2>
136 static void segreduce(vector_data_type & vector_data,
137 vector_data_type & vector_data_unsorted,
138 vector_index_type & vector_data_map,
139 vector_index_type2 & segment_offset,
140 vector_data_type & vector_data_red,
145 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
146 typedef typename boost::mpl::at<typename vector_data_type::value_type::type,typename reduction_type::prop>::type red_type;
147 typedef typename reduction_type::template op_red<red_type> red_op;
148 typedef typename boost::mpl::at<typename vector_index_type::value_type::type,boost::mpl::int_<0>>::type seg_type;
149 typename reduction_type::template op_initial_value<red_type> initial_value_functor;
151 assert((std::is_same<seg_type,int>::value ==
true));
154 (red_type *)vector_data.template getDeviceBuffer<reduction_type::prop::value>(), vector_data.size(),
155 (
int *)segment_offset.template getDeviceBuffer<1>(), segment_offset.size()-1,
156 (red_type *)vector_data_red.template getDeviceBuffer<reduction_type::prop::value>(),
157 red_op(), initial_value_functor(), gpuContext);
159 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
180 typename vector_data_type,
181 typename vector_index_type,
182 typename vector_index_type2,
183 typename vector_index_dtmp_type,
185 typename ... v_reduce>
187 vector_index_type & vct_index,
188 vector_index_type & vct_index_tmp,
189 vector_index_type & vct_index_tmp2,
190 vector_index_type & vct_index_tmp3,
191 vector_index_dtmp_type & vct_index_dtmp,
192 vector_index_type & vct_add_index_cont_1,
193 vector_index_type2 & vct_add_index_unique,
194 vector_data_type & vct_data,
195 vector_data_type & vct_add_data,
196 vector_data_type & vct_add_data_unique,
197 vector_data_type & vct_add_data_cont,
205 CUDA_LAUNCH((solve_conflicts<
206 decltype(vct_index_tmp.toKernel()),
207 decltype(vct_data.toKernel()),
208 decltype(vct_index_dtmp.toKernel()),
213 vct_index_tmp.toKernel(),vct_data.toKernel(),
214 vct_index_tmp2.toKernel(),vct_add_data_unique.toKernel(),
215 vct_index_tmp3.toKernel(),vct_add_data_cont.toKernel(),
216 vct_index_dtmp.toKernel(),
217 (
int)vct_index.size());
221 (Ti*)vct_index_dtmp.template getDeviceBuffer<0>(),
222 vct_index_dtmp.size(),
223 (Ti *)vct_index_dtmp.template getDeviceBuffer<1>(),
227 vct_index_dtmp.template deviceToHost<0,1>(vct_index_dtmp.size()-1,vct_index_dtmp.size()-1);
228 int size = vct_index_dtmp.template get<1>(vct_index_dtmp.size()-1) + vct_index_dtmp.template get<0>(vct_index_dtmp.size()-1);
230 vct_index.resize(size);
231 vct_data.resize(size);
233 CUDA_LAUNCH(realign,itew,vct_index_tmp3.toKernel(),vct_add_data_cont.toKernel(),
234 vct_index.toKernel(), vct_data.toKernel(),
235 vct_index_dtmp.toKernel());
239 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
245 template<
typename block_functor>
248 template <
unsigned int p,
typename vector_index_type>
249 static void extendSegments(vector_index_type & segments,
size_t dataSize)
253 segments.resize(segments.size()+1);
254 segments.template get<p>(segments.size() - 1) = dataSize;
255 segments.template hostToDevice<p>(segments.size() - 1, segments.size() - 1);
257 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
261 template <
unsigned int pSegment,
typename vector_reduction,
typename T,
typename vector_data_type,
typename vector_index_type ,
typename vector_index_type2>
262 static void segreduce(vector_data_type & vector_data,
263 vector_data_type & vector_data_unsorted,
264 vector_index_type & vector_data_map,
265 vector_index_type2 & segment_offset,
266 vector_data_type & vector_data_red,
274 typename vector_data_type,
275 typename vector_index_type,
276 typename vector_index_type2,
277 typename vector_index_dtmp_type,
279 typename ... v_reduce>
281 vector_index_type & vct_index,
282 vector_index_type & vct_index_tmp,
283 vector_index_type & vct_index_tmp2,
284 vector_index_type & vct_index_tmp3,
285 vector_index_dtmp_type & vct_index_dtmp,
286 vector_index_type & vct_add_index_cont_1,
287 vector_index_type2 & vct_add_index_unique,
288 vector_data_type & vct_data,
289 vector_data_type & vct_add_data,
290 vector_data_type & vct_add_data_unique,
291 vector_data_type & vct_add_data_cont,
298 blf.template solve_conflicts<1,
299 decltype(vct_index_tmp),
300 decltype(vct_add_index_unique),
303 (vct_index_tmp, vct_index_tmp2, vct_add_index_unique, vct_add_index_cont_1,
304 vct_data, vct_add_data,
305 vct_index, vct_add_data_cont,
307 vct_add_data_cont.swap(vct_data);
310 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
315 template<
typename Ti>
321 bool operator<(
const reorder & t)
const
327 template<
typename reduction_type,
typename vector_reduction,
typename T,
unsigned int impl,
typename red_type>
330 template<
typename vector_data_type,
typename vector_index_type,
typename vector_index_type_reo>
331 static inline void red(
size_t & i, vector_data_type & vector_data_red,
332 vector_data_type & vector_data,
333 vector_index_type & vector_index,
334 vector_index_type_reo & reorder_add_index_cpu)
336 size_t start = reorder_add_index_cpu.get(i).id;
337 red_type
red = vector_data.template get<reduction_type::prop::value>(i);
340 for ( ; i+j < reorder_add_index_cpu.size() && reorder_add_index_cpu.get(i+j).id == start ; j++)
345 vector_data_red.add();
346 vector_data_red.template get<reduction_type::prop::value>(vector_data_red.size()-1) =
red;
351 vector_index.template get<0>(vector_index.size() - 1) = reorder_add_index_cpu.get(i).id;
359 template<
typename reduction_type,
typename vector_reduction,
typename T,
unsigned int impl,
typename red_type,
unsigned int N1>
362 template<
typename vector_data_type,
typename vector_index_type,
typename vector_index_type_reo>
363 static inline void red(
size_t & i, vector_data_type & vector_data_red,
364 vector_data_type & vector_data,
365 vector_index_type & vector_index,
366 vector_index_type_reo & reorder_add_index_cpu)
368 size_t start = reorder_add_index_cpu.get(i).id;
371 for (
size_t k = 0 ; k < N1 ; k++)
373 red[k] = vector_data.template get<reduction_type::prop::value>(i)[k];
377 for ( ; i+j < reorder_add_index_cpu.size() && reorder_add_index_cpu.get(i+j).id == start ; j++)
379 auto ev = vector_data.template get<reduction_type::prop::value>(i+j);
384 vector_data_red.add();
386 for (
size_t k = 0 ; k < N1 ; k++)
388 vector_data_red.template get<reduction_type::prop::value>(vector_data_red.size()-1)[k] =
red[k];
394 vector_index.template get<0>(vector_index.size() - 1) = reorder_add_index_cpu.get(i).id;
411 template<
typename vector_data_type,
412 typename vector_index_type,
413 typename vector_index_type_reo,
414 typename vector_reduction,
447 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
448 typedef typename boost::mpl::at<typename ValueTypeOf<vector_data_type>::type,
typename reduction_type::prop>::type red_type;
450 if (reduction_type::is_special() ==
false)
490 template<
typename encap_src,
492 typename vector_reduction>
516 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
518 dst.template get<reduction_type::prop::value>() =
src.template get<reduction_type::prop::value>();
523 template<
unsigned int impl,
typename vector_reduction,
typename T,
typename red_type>
526 template<
typename encap_src,
typename encap_dst>
527 static inline void red(encap_src & src, encap_dst & dst)
529 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
535 template<
unsigned int impl,
typename vector_reduction,
typename T,
typename red_type,
unsigned int N1>
538 template<
typename encap_src,
typename encap_dst>
539 static inline void red(encap_src & src, encap_dst & dst)
541 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
543 auto src_e = src.template get<reduction_type::prop::value>();
544 auto dst_e = dst.template get<reduction_type::prop::value>();
560 template<
typename encap_src,
562 typename vector_reduction,
587 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
588 typedef typename boost::mpl::at<typename encap_src::T_type::type, typename reduction_type::prop>::type red_type;
607 template<
typename vector_data_type,
608 typename vector_index_type,
609 typename vector_index_type2,
610 typename vector_reduction,
611 typename block_functor,
612 unsigned int impl2,
unsigned int pSegment=1>
664 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
665 typedef typename boost::mpl::at<typename ValueTypeOf<vector_data_type>::type,
typename reduction_type::prop>::type red_type;
666 if (reduction_type::is_special() ==
false)
678 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
686 template<
unsigned int pSegment,
typename vector_reduction,
typename T,
typename vector_index_type,
typename vector_data_type>
687 static bool seg_reduce(vector_index_type & segments, vector_data_type & src, vector_data_type & dst)
692 template<
typename vector_index_type,
typename vector_data_type,
typename ... v_reduce>
693 static bool solve_conflicts(vector_index_type &keys, vector_index_type &merge_indices,
694 vector_data_type &data1, vector_data_type &data2,
695 vector_index_type &indices_tmp, vector_data_type &data_tmp,
696 vector_index_type &keysOut, vector_data_type &dataOut,
725 template<
typename vector_data_type,
typename vector_index_type,
typename vector_reduction>
759 typedef typename boost::mpl::at<vector_reduction,T>::type reduction_type;
762 typedef typename boost::mpl::at<typename vector_data_type::value_type::type,typename reduction_type::prop>::type red_type;
764 if (reduction_type::is_special() ==
true)
774 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file si supposed to be compiled with nvcc" << std::endl;
780 typename Ti =
long int,
784 typename grow_p=grow_policy_double,
785 unsigned int impl=vect_isel<T>::value,
786 unsigned int impl2 = VECTOR_SPARSE_STANDARD,
787 typename block_functor = stub_block_functor>
824 int n_gpu_add_block_slot = 0;
825 int n_gpu_rem_block_slot = 0;
833 template<
bool prefetch>
836 if (vct_index.
size() == 0) {
id = 0;
return -1;}
837 const Ti *base = &vct_index.template get<0>(0);
838 const Ti *end = (
const Ti *)vct_index.template getPointer<0>() + vct_index.
size();
839 Ti n = vct_data.
size()-1;
845 __builtin_prefetch(base + half/2, 0, 0);
846 __builtin_prefetch(base + half + half/2, 0, 0);
848 base = (base[half] < x) ? base+half : base;
852 int off = (*base < x);
853 id = base - &vct_index.template get<0>(0) + off;
854 return (base + off != end)?*(base + off):-1;
863 template<
bool prefetch>
866 Ti v = _branchfree_search_nobck<prefetch>(x,
id);
867 id = (x == v)?
id:vct_data.
size()-1;
890 vct_nadd_index.resize(vct_nadd_index.
size()+1);
891 vct_nadd_index.template get<0>(vct_nadd_index.
size()-1) = 0;
892 vct_nadd_index.template hostToDevice<0>(vct_nadd_index.
size()-1,vct_nadd_index.
size()-1);
895 vct_index_tmp4.resize(vct_nadd_index.
size());
897 openfpm::scan((Ti *)vct_nadd_index.template getDeviceBuffer<0>(),
898 vct_nadd_index.
size(),
899 (Ti *)vct_index_tmp4.template getDeviceBuffer<0>() ,
902 vct_index_tmp4.template deviceToHost<0>(vct_index_tmp4.
size()-1,vct_index_tmp4.
size()-1);
903 size_t n_ele = vct_index_tmp4.template get<0>(vct_index_tmp4.
size()-1);
906 vct_add_index_cont_0.resize(n_ele);
907 vct_add_index_cont_1.resize(n_ele);
909 if (impl2 == VECTOR_SPARSE_STANDARD)
911 vct_add_data_cont.resize(n_ele);
914 if (n_gpu_add_block_slot >= 128)
917 itew.wthr.x = vct_nadd_index.
size()-1;
924 CUDA_LAUNCH(construct_insert_list_key_only,itew,vct_add_index.toKernel(),
925 vct_nadd_index.toKernel(),
926 vct_index_tmp4.toKernel(),
927 vct_add_index_cont_0.toKernel(),
928 vct_add_index_cont_1.toKernel(),
929 n_gpu_add_block_slot);
933 auto itew = vct_add_index.getGPUIterator();
935 CUDA_LAUNCH(construct_insert_list_key_only_small_pool,itew,vct_add_index.toKernel(),
936 vct_nadd_index.toKernel(),
937 vct_index_tmp4.toKernel(),
938 vct_add_index_cont_0.toKernel(),
939 vct_add_index_cont_1.toKernel(),
940 n_gpu_add_block_slot);
965 itew.wthr.x = vct_nadd_index.
size()-1;
972 size_t n_ele = vct_add_index_cont_0.
size();
974 n_gpu_add_block_slot = 0;
978 (Ti *)vct_add_index_cont_0.template getDeviceBuffer<0>(),
979 (Ti *)vct_add_index_cont_1.template getDeviceBuffer<0>(),
980 vct_add_index_cont_0.
size(),
981 gpu::template less_t<Ti>(),
984 auto ite = vct_add_index_cont_0.getGPUIterator();
988 if (impl2 == VECTOR_SPARSE_STANDARD)
990 vct_add_data_reord.resize(n_ele);
991 CUDA_LAUNCH(reorder_vector_data,ite,vct_add_index_cont_1.toKernel(),vct_add_data.toKernel(),vct_add_data_reord.toKernel());
1003 template<
typename ... v_reduce>
1012 typedef boost::mpl::vector<v_reduce...> vv_reduce;
1014 auto ite = vct_add_index_cont_0.getGPUIterator();
1018 vct_add_index_unique.resize(vct_add_index_cont_0.
size()+1);
1020 ite = vct_add_index_cont_0.getGPUIterator();
1022 vct_index_tmp4.resize(vct_add_index_cont_0.
size()+1);
1026 find_buffer_offsets_for_scan
1028 decltype(vct_add_index_cont_0.toKernel()),
1029 decltype(vct_index_tmp4.toKernel())
1033 vct_add_index_cont_0.toKernel(),
1034 vct_index_tmp4.toKernel());
1036 openfpm::scan((Ti *)vct_index_tmp4.template getDeviceBuffer<0>(),vct_index_tmp4.
size(),(Ti *)vct_index_tmp4.template getDeviceBuffer<0>(),gpuContext);
1038 vct_index_tmp4.template deviceToHost<0>(vct_index_tmp4.
size()-1,vct_index_tmp4.
size()-1);
1039 int n_ele_unique = vct_index_tmp4.template get<0>(vct_index_tmp4.
size()-1);
1041 vct_add_index_unique.resize(n_ele_unique);
1043 if (impl2 == VECTOR_SPARSE_STANDARD)
1045 vct_add_data_unique.resize(n_ele_unique);
1049 (construct_index_unique<0>),
1051 vct_add_index_cont_0.toKernel(),
1052 vct_index_tmp4.toKernel(),
1053 vct_add_index_unique.toKernel());
1055 typedef boost::mpl::vector<v_reduce...> vv_reduce;
1060 vct_m_index.resize(vct_index.
size());
1062 if (vct_m_index.
size() != 0)
1064 ite = vct_m_index.getGPUIterator();
1065 CUDA_LAUNCH((set_indexes<0>),ite,vct_m_index.toKernel(),0);
1071 vct_index_tmp.resize(vct_index.
size() + vct_add_index_unique.
size());
1072 vct_index_tmp2.resize(vct_index.
size() + vct_add_index_unique.
size());
1073 vct_index_tmp3.resize(vct_index.
size() + vct_add_index_unique.
size());
1077 if (impl2 == VECTOR_SPARSE_STANDARD)
1079 vct_add_data_cont.reserve(vct_index.
size() + vct_add_index_unique.
size()+1);
1080 vct_add_data_cont.resize(vct_index.
size() + vct_add_index_unique.
size());
1083 ite = vct_add_index_unique.getGPUIterator();
1084 vct_index_tmp4.resize(vct_add_index_unique.
size());
1085 CUDA_LAUNCH((set_indexes<0>),ite,vct_index_tmp4.toKernel(),(
int)vct_index.
size());
1089 itew.wthr.x = vct_index_tmp.
size() / 128 + (vct_index_tmp.
size() % 128 != 0);
1096 vct_index_dtmp.resize(itew.wthr.x);
1101 openfpm::merge((Ti *)vct_index.template getDeviceBuffer<0>(),(Ti *)vct_m_index.template getDeviceBuffer<0>(),vct_index.
size(),
1102 (Ti *)vct_add_index_unique.template getDeviceBuffer<0>(),(Ti *)vct_index_tmp4.template getDeviceBuffer<0>(),vct_add_index_unique.
size(),
1103 (Ti *)vct_index_tmp.template getDeviceBuffer<0>(),(Ti *)vct_index_tmp2.template getDeviceBuffer<0>(),
gpu::less_t<Ti>(),gpuContext);
1111 template<
typename ... v_reduce>
1120 itew.wthr.x = vct_index_tmp.
size() / 128 + (vct_index_tmp.
size() % 128 != 0);
1127 typedef boost::mpl::vector<v_reduce...> vv_reduce;
1133 ::template extendSegments<1>(vct_add_index_unique, vct_add_index_cont_1.
size());
1135 if (impl2 == VECTOR_SPARSE_STANDARD)
1138 decltype(vct_add_index_cont_1),
1139 decltype(vct_add_index_unique),vv_reduce,block_functor,impl2>
1141 vct_add_data_unique,
1144 vct_add_index_cont_1,
1145 vct_add_index_unique,
1149 boost::mpl::for_each_ref<boost::mpl::range_c<
int,0,
sizeof...(v_reduce)>>(svr);
1150 vct_add_index_unique.remove(vct_add_index_unique.
size()-1);
1153 sparse_vector_special<
typename std::remove_reference<decltype(vct_add_data)>::type,
1154 decltype(vct_add_index_unique),
1155 vv_reduce> svr2(vct_add_data_unique,vct_add_data_reord,vct_add_index_unique,gpuContext);
1156 boost::mpl::for_each_ref<boost::mpl::range_c<
int,0,
sizeof...(v_reduce)>>(svr2);
1161 scalar_block_implementation_switch<impl2, block_functor>::template solveConflicts<
1163 decltype(vct_index),
1164 decltype(vct_add_index_unique),
1165 decltype(vct_index_dtmp),
1175 vct_add_index_cont_1,
1176 vct_add_index_unique,
1179 vct_add_data_unique,
1188 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
1192 template<
typename ... v_reduce>
1193 void flush_on_gpu_insert(vector<
aggregate<Ti>,Memory,layout_base,grow_p> & vct_add_index_cont_0,
1194 vector<
aggregate<Ti>,Memory,layout_base,grow_p> & vct_add_index_cont_1,
1195 vector<T,Memory,layout_base,grow_p> & vct_add_data_reord,
1201 if (n_gpu_add_block_slot == 0 || vct_add_index.
size() == 0)
1206 size_t n_ele = make_continuos(vct_nadd_index,vct_add_index,vct_add_index_cont_0,
1207 vct_add_index_cont_1,vct_add_data_cont,gpuContext);
1212 if (vct_add_index_cont_0.
size() == 0)
1215 reorder_indexes(vct_add_index_cont_0,vct_add_index_cont_1,vct_add_data_reord,vct_add_data,gpuContext);
1217 merge_indexes<v_reduce ... >(vct_add_index_cont_0,vct_add_index_unique,
1218 vct_index_tmp,vct_index_tmp2,
1221 merge_datas<v_reduce ... >(vct_add_data_reord,vct_add_index_unique,vct_add_data,vct_add_index_cont_1,gpuContext);
1224 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
1229 void flush_on_gpu_remove(
1235 vct_nrem_index.resize(vct_nrem_index.
size()+1);
1236 vct_nrem_index.template get<0>(vct_nrem_index.
size()-1) = 0;
1237 vct_nrem_index.template hostToDevice<0>(vct_nrem_index.
size()-1,vct_nrem_index.
size()-1);
1240 vct_index_tmp4.resize(vct_nrem_index.
size());
1242 openfpm::scan((Ti *)vct_nrem_index.template getDeviceBuffer<0>(), vct_nrem_index.
size(), (Ti *)vct_index_tmp4.template getDeviceBuffer<0>(), gpuContext);
1244 vct_index_tmp4.template deviceToHost<0>(vct_index_tmp4.
size()-1,vct_index_tmp4.
size()-1);
1245 size_t n_ele = vct_index_tmp4.template get<0>(vct_index_tmp4.
size()-1);
1248 vct_add_index_cont_0.resize(n_ele);
1249 vct_add_index_cont_1.resize(n_ele);
1252 itew.wthr.x = vct_nrem_index.
size()-1;
1259 CUDA_LAUNCH(construct_remove_list,itew,vct_rem_index.toKernel(),
1260 vct_nrem_index.toKernel(),
1261 vct_index_tmp4.toKernel(),
1262 vct_add_index_cont_0.toKernel(),
1263 vct_add_index_cont_1.toKernel(),
1264 n_gpu_rem_block_slot);
1267 openfpm::sort((Ti *)vct_add_index_cont_0.template getDeviceBuffer<0>(),(Ti *)vct_add_index_cont_1.template getDeviceBuffer<0>(),
1268 vct_add_index_cont_0.
size(), gpu::template less_t<Ti>(), gpuContext);
1270 auto ite = vct_add_index_cont_0.getGPUIterator();
1274 vct_add_index_unique.resize(vct_add_index_cont_0.
size()+1);
1276 ite = vct_add_index_cont_0.getGPUIterator();
1280 CUDA_LAUNCH((find_buffer_offsets_zero<0,decltype(vct_add_index_cont_0.toKernel()),decltype(vct_add_index_unique.toKernel())>),
1282 vct_add_index_cont_0.toKernel(),(
int *)mem.
getDevicePointer(),vct_add_index_unique.toKernel());
1287 vct_add_index_unique.resize(n_ele_unique);
1289 openfpm::sort((Ti *)vct_add_index_unique.template getDeviceBuffer<1>(),(Ti *)vct_add_index_unique.template getDeviceBuffer<0>(),
1290 vct_add_index_unique.
size(),gpu::template less_t<Ti>(),gpuContext);
1295 vct_m_index.resize(vct_index.
size() + vct_add_index_unique.
size());
1297 ite = vct_m_index.getGPUIterator();
1298 CUDA_LAUNCH((set_indexes<0>),ite,vct_m_index.toKernel(),0);
1300 ite = vct_add_index_unique.getGPUIterator();
1301 CUDA_LAUNCH((set_indexes<1>),ite,vct_add_index_unique.toKernel(),(
int)vct_index.
size());
1306 vct_index_tmp.resize(vct_index.
size() + vct_add_index_unique.
size());
1307 vct_index_tmp2.resize(vct_index.
size() + vct_add_index_unique.
size());
1309 itew.wthr.x = vct_index_tmp.
size() / 128 + (vct_index_tmp.
size() % 128 != 0);
1316 vct_index_dtmp.resize(itew.wthr.x);
1320 openfpm::merge((Ti *)vct_index.template getDeviceBuffer<0>(),(Ti *)vct_m_index.template getDeviceBuffer<0>(),vct_index.
size(),
1321 (Ti *)vct_add_index_unique.template getDeviceBuffer<0>(),(Ti *)vct_add_index_unique.template getDeviceBuffer<1>(),vct_add_index_unique.
size(),
1322 (Ti *)vct_index_tmp.template getDeviceBuffer<0>(),(Ti *)vct_index_tmp2.template getDeviceBuffer<0>(),
gpu::less_t<Ti>(),gpuContext);
1324 vct_index_tmp3.resize(128*itew.wthr.x);
1326 CUDA_LAUNCH((solve_conflicts_remove<decltype(vct_index_tmp.toKernel()),decltype(vct_index_dtmp.toKernel()),128>),
1328 vct_index_tmp.toKernel(),
1329 vct_index_tmp2.toKernel(),
1330 vct_index_tmp3.toKernel(),
1331 vct_m_index.toKernel(),
1332 vct_index_dtmp.toKernel(),
1333 (
int)vct_index.
size());
1336 openfpm::scan((Ti*)vct_index_dtmp.template getDeviceBuffer<0>(),vct_index_dtmp.
size(),(Ti *)vct_index_dtmp.template getDeviceBuffer<1>(),gpuContext);
1339 vct_index_dtmp.template deviceToHost<0,1>(vct_index_dtmp.
size()-1,vct_index_dtmp.
size()-1);
1340 int size = vct_index_dtmp.template get<1>(vct_index_dtmp.
size()-1) + vct_index_dtmp.template get<0>(vct_index_dtmp.
size()-1);
1342 vct_add_data_cont.resize(
size);
1343 vct_index.resize(
size);
1345 CUDA_LAUNCH(realign_remove,itew,vct_index_tmp3.toKernel(),vct_m_index.toKernel(),vct_data.toKernel(),
1346 vct_index.toKernel(),vct_add_data_cont.toKernel(),
1347 vct_index_dtmp.toKernel());
1349 vct_data.swap(vct_add_data_cont);
1352 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: you are suppose to compile this file with nvcc, if you want to use it with gpu" << std::endl;
1359 vct_data.resize(vct_data.
size()+1);
1360 vct_data.get(vct_data.
size()-1) = bck;
1362 htoD<decltype(vct_data)> trf(vct_data,vct_data.
size()-1);
1363 boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(trf);
1366 template<
typename ... v_reduce>
1367 void flush_on_gpu(vector<
aggregate<Ti>,Memory,layout_base,grow_p> & vct_add_index_cont_0,
1368 vector<
aggregate<Ti>,Memory,layout_base,grow_p> & vct_add_index_cont_1,
1369 vector<T,Memory,layout_base,grow_p> & vct_add_data_reord,
1372 flush_on_gpu_insert<v_reduce ... >(vct_add_index_cont_0,vct_add_index_cont_1,vct_add_data_reord,gpuContext);
1375 template<
typename ... v_reduce>
1378 if (vct_add_index.
size() == 0)
1382 reorder_add_index_cpu.resize(vct_add_index.
size());
1383 vct_add_data_cont.resize(vct_add_index.
size());
1385 for (
size_t i = 0 ; i < reorder_add_index_cpu.
size() ; i++)
1387 reorder_add_index_cpu.get(i).id = vct_add_index.template get<0>(i);
1388 reorder_add_index_cpu.get(i).id2 = i;
1391 reorder_add_index_cpu.sort();
1394 for (
size_t i = 0 ; i < reorder_add_index_cpu.
size() ; i++)
1396 vct_add_data_cont.get(i) = vct_add_data.get(reorder_add_index_cpu.get(i).id2);
1399 typedef boost::mpl::vector<v_reduce...> vv_reduce;
1401 sparse_vector_reduction_cpu<decltype(vct_add_data),
1402 decltype(vct_add_index_unique),
1403 decltype(reorder_add_index_cpu),
1406 svr(vct_add_data_unique,
1408 vct_add_index_unique,
1409 reorder_add_index_cpu);
1411 boost::mpl::for_each_ref<boost::mpl::range_c<
int,0,
sizeof...(v_reduce)>>(svr);
1415 vector<T,Memory,layout_base,grow_p,impl> vct_data_tmp;
1416 vector<aggregate<Ti>,Memory,layout_base,grow_p> vct_index_tmp;
1418 vct_data_tmp.resize(vct_data.
size() + vct_add_data_unique.
size());
1419 vct_index_tmp.resize(vct_index.
size() + vct_add_index_unique.
size());
1425 for ( ; i < vct_data_tmp.size() ; i++)
1427 Ti id_a = (ai < vct_add_index_unique.
size())?vct_add_index_unique.template get<0>(ai):std::numeric_limits<Ti>::max();
1428 Ti id_d = (di < vct_index.
size())?vct_index.template get<0>(di):std::numeric_limits<Ti>::max();
1432 vct_index_tmp.template get<0>(i) = id_a;
1436 auto dst = vct_data_tmp.get(i);
1437 auto src = vct_add_data_unique.get(ai);
1439 sparse_vector_reduction_solve_conflict_assign_cpu<decltype(vct_data_tmp.get(i)),
1440 decltype(vct_add_data.get(ai)),
1444 boost::mpl::for_each_ref<boost::mpl::range_c<
int,0,
sizeof...(v_reduce)>>(sva);
1447 dst = vct_data_tmp.get(i);
1448 src = vct_data.get(di);
1450 sparse_vector_reduction_solve_conflict_reduce_cpu<decltype(vct_data_tmp.get(i)),
1451 decltype(vct_data.get(di)),
1455 boost::mpl::for_each_ref<boost::mpl::range_c<
int,0,
sizeof...(v_reduce)>>(svr);
1459 vct_data_tmp.resize(vct_data_tmp.size()-1);
1460 vct_index_tmp.resize(vct_index_tmp.size()-1);
1464 vct_index_tmp.template get<0>(i) = vct_add_index_unique.template get<0>(ai);
1465 vct_data_tmp.get(i) = vct_add_data_unique.get(ai);
1471 vct_index_tmp.template get<0>(i) = vct_index.template get<0>(di);
1472 vct_data_tmp.get(i) = vct_data.get(di);
1477 vct_index.swap(vct_index_tmp);
1478 vct_data.swap(vct_data_tmp);
1480 vct_add_data.clear();
1481 vct_add_index.clear();
1482 vct_add_index_unique.clear();
1483 vct_add_data_unique.clear();
1544 this->_branchfree_search<false>(
id,di);
1561 template <
unsigned int p>
1562 inline auto get(Ti
id)
const -> decltype(vct_data.template get<p>(
id))
1565 this->_branchfree_search<false>(
id,di);
1566 return vct_data.template get<p>(di);
1579 inline auto get(Ti
id)
const -> decltype(vct_data.get(
id))
1582 this->_branchfree_search<false>(
id,di);
1583 return vct_data.get(di);
1614 template <
unsigned int p>
1617 return vct_data.template get<p>(vct_data.
size()-1);
1627 return vct_data.get(vct_data.
size()-1);
1630 template<
unsigned int p>
1631 void setBackground(
const typename boost::mpl::at<
typename T::type, boost::mpl::int_<p>>::type & bck_)
1634 typename std::remove_reference<decltype(vct_data.template get<p>(vct_data.
size()-1))>::type>
1635 ::meta_copy_d_(bck_,vct_data.template get<p>(vct_data.
size()-1));
1637 vct_data.template hostToDevice<p>(vct_data.
size()-1,vct_data.
size()-1);
1640 ::meta_copy_(bck_,bck.template get<p>());
1650 template <
unsigned int p>
1651 auto insert(Ti
ele) -> decltype(vct_data.template get<p>(0))
1653 vct_add_index.add();
1654 vct_add_index.template get<0>(vct_add_index.
size()-1) =
ele;
1656 return vct_add_data.template get<p>(vct_add_data.
size()-1);
1666 template <
unsigned int p>
1678 return vct_data.template get<p>(di);
1683 vct_index.insert(di);
1684 vct_data.insert(di);
1686 return vct_data.template get<p>(di);
1700 Ti v = _branchfree_search_nobck<true>(
ele,di);
1705 return vct_data.get(di);
1709 vct_index.insert(di);
1710 vct_data.insert(di);
1713 vct_index.template get<0>(di) =
ele;
1715 return vct_data.get(di);
1725 vct_add_index.add();
1726 vct_add_index.template get<0>(vct_add_index.
size()-1) =
ele;
1728 return vct_add_data.get(vct_add_data.
size()-1);
1738 template<
typename ... v_reduce>
1741 flush_type opt = FLUSH_ON_HOST,
1745 vct_data.resize(vct_index.
size());
1747 if (opt & flush_type::FLUSH_ON_DEVICE)
1748 {this->flush_on_gpu<v_reduce ... >(vct_add_index_cont_0,vct_add_index_cont_1,vct_add_data_reord,gpuContext,i);}
1750 {this->flush_on_cpu<v_reduce ... >();}
1762 template<
typename ... v_reduce>
1765 flush_type opt = FLUSH_ON_HOST)
1768 vct_data.resize(vct_index.
size());
1770 if (opt & flush_type::FLUSH_ON_DEVICE)
1771 {this->flush_on_gpu<v_reduce ... >(vct_add_index_cont_0,vct_add_index_cont_1,vct_add_data_reord,gpuContext);}
1773 {this->flush_on_cpu<v_reduce ... >();}
1783 template<
typename ... v_reduce>
1787 vct_data.resize(vct_index.
size());
1789 if (opt & flush_type::FLUSH_ON_DEVICE)
1790 {this->flush_on_gpu<v_reduce ... >(vct_add_index_cont_0,vct_add_index_cont_1,vct_add_data_reord,gpuContext);}
1792 {this->flush_on_cpu<v_reduce ... >();}
1804 vct_data.resize(vct_data.
size()-1);
1806 if (opt & flush_type::FLUSH_ON_DEVICE)
1807 {this->flush_on_gpu_remove(gpuContext);}
1810 std::cerr << __FILE__ <<
":" << __LINE__ <<
" error, flush_remove on CPU has not implemented yet";
1822 return vct_index.
size();
1840 template<
unsigned int ... prp>
1843 vct_index.template deviceToHost<0>();
1852 template<
unsigned int ... prp>
1855 vct_index.template hostToDevice<0>();
1867 vct_add_index.toKernel(),
1868 vct_rem_index.toKernel(),vct_add_data.toKernel(),
1869 vct_nadd_index.toKernel(),
1870 vct_nrem_index.toKernel(),
1871 n_gpu_add_block_slot,
1872 n_gpu_rem_block_slot);
1885 vct_add_index.resize(nblock*nslot);
1886 vct_nadd_index.resize(nblock);
1887 vct_add_data.resize(nblock*nslot);
1888 n_gpu_add_block_slot = nslot;
1889 vct_nadd_index.template fill<0>(0);
1900 vct_nadd_index.resize(vct_add_index.
size());
1902 if (vct_nadd_index.
size() != 0)
1904 auto ite = vct_nadd_index.getGPUIterator();
1905 CUDA_LAUNCH((set_one_insert_buffer),ite,vct_nadd_index.toKernel());
1907 n_gpu_add_block_slot = 1;
1917 return vct_add_data;
1928 vct_rem_index.resize(nblock*nslot);
1929 vct_nrem_index.resize(nblock);
1930 n_gpu_rem_block_slot = nslot;
1931 vct_nrem_index.template fill<0>(0);
1941 auto getGPUIterator() -> decltype(vct_index.getGPUIterator())
1943 return vct_index.getGPUIterator();
1956 vct_add_index.clear();
1957 vct_add_data.clear();
1958 vct_add_index_cont_0.clear();
1961 vct_data.resize(vct_data.
size()+1);
1962 vct_data.get(vct_data.
size()-1) = bck;
1964 htoD<decltype(vct_data)> trf(vct_data,vct_data.
size()-1);
1965 boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(trf);
1968 n_gpu_add_block_slot = 0;
1969 n_gpu_rem_block_slot = 0;
1974 vct_data.swap(sp.vct_data);
1975 vct_index.swap(sp.vct_index);
1976 vct_add_index.swap(sp.vct_add_index);
1977 vct_add_data.swap(sp.vct_add_data);
1979 size_t max_ele_ = sp.max_ele;
1980 sp.max_ele = max_ele;
1981 this->max_ele = max_ele_;
1984 vector<T,Memory,layout_base,grow_p> & private_get_vct_add_data()
1986 return vct_add_data;
1989 vector<aggregate<Ti>,Memory,layout_base,grow_p> & private_get_vct_add_index()
1991 return vct_add_index;
1994 const vector<aggregate<Ti>,Memory,layout_base,grow_p> & private_get_vct_add_index()
const
1996 return vct_add_index;
1999 vector<aggregate<Ti>,Memory,layout_base,grow_p> & private_get_vct_nadd_index()
2001 return vct_nadd_index;
2004 const vector<aggregate<Ti>,Memory,layout_base,grow_p> & private_get_vct_nadd_index()
const
2006 return vct_nadd_index;
2009 auto getSegmentToOutMap() -> decltype(blf.get_outputMap())
2011 return blf.get_outputMap();
2014 auto getSegmentToOutMap() const -> decltype(blf.get_outputMap())
2016 return blf.get_outputMap();
2025 vct_add_data.resize(0);
2026 vct_add_data.shrink_to_fit();
2028 vct_add_data.resize(0);
2029 vct_add_data.shrink_to_fit();
2031 vct_add_data_reord.resize(0);
2032 vct_add_data_reord.shrink_to_fit();
2034 vct_add_data_cont.resize(0);
2035 vct_add_data_cont.shrink_to_fit();
2037 vct_add_data_unique.resize(0);
2038 vct_add_data_unique.shrink_to_fit();
2047 return vct_add_index_unique;
2050 vector<aggregate<Ti,Ti>,Memory,layout_base,grow_p> & getSegmentToMergeIndexMap()
const
2052 return vct_add_index_unique;
2075 return vct_add_index_cont_1;
2098 return vct_index_tmp2;
2103 template<
typename T,
unsigned int blockSwitch = VECTOR_SPARSE_STANDARD,
typename block_functor = stub_block_functor,
typename indexT =
int>
2111 vect_isel<T>::value,
2116 template<
typename T,
typename block_functor = stub_block_functor,
typename indexT =
long int>
2124 vect_isel<T>::value,
2125 VECTOR_SPARSE_BLOCK,
virtual void * getDevicePointer()
get a readable pointer with the data
virtual void deviceToHost()
Move memory from device to host.
virtual void fill(unsigned char c)
fill the buffer with a byte
virtual void * getPointer()
get a readable pointer with the data
virtual bool allocate(size_t sz)
allocate memory
This class allocate, and destroy CPU memory.
auto insertFlush(Ti ele, bool &is_new) -> decltype(vct_data.get(0))
It insert an element in the sparse vector.
auto insert(Ti ele) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
auto getBackground() const -> decltype(vct_data.template get< p >(vct_data.size() -1))
Set the background to bck (which value get must return when the value is not find)
openfpm::sparse_index< Ti > get_sparse(Ti id) const
Get the sparse index.
vector< aggregate< Ti >, Memory, layout_base, grow_p > & getMergeIndexMapVector()
Return the merge mapping vector.
auto getIndexBuffer() const -> const decltype(vct_index)&
Get the indices buffer.
auto getGPUInsertBuffer() -> decltype(vct_add_data)&
Get the GPU insert buffer.
void flush_vd(vector< T, Memory, layout_base, grow_p > &vct_add_data_reord, gpu::ofp_context_t &gpuContext, flush_type opt=FLUSH_ON_HOST)
merge the added element to the main data array but save the insert buffer in vct_add_data_reord
void _branchfree_search(Ti x, Ti &id) const
get the element i
void deviceToHost()
Transfer from device to host.
void setGPURemoveBuffer(int nblock, int nslot)
set the gpu remove buffer for every block
auto getIndexBuffer() -> decltype(vct_index)&
Get the indices buffer.
vector_sparse_gpu_ker< T, Ti, layout_base > toKernel()
toKernel function transform this structure into one that can be used on GPU
auto get(Ti id) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
void flush_v(vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_add_index_cont_0, gpu::ofp_context_t &gpuContext, flush_type opt=FLUSH_ON_HOST, int i=0)
merge the added element to the main data array but save the insert buffer in v
void flush_remove(gpu::ofp_context_t &gpuContext, flush_type opt=FLUSH_ON_HOST)
merge the added element to the main data array
auto insert(Ti ele) -> decltype(vct_data.get(0))
It insert an element in the sparse vector.
void flush(gpu::ofp_context_t &gpuContext, flush_type opt=FLUSH_ON_HOST)
merge the added element to the main data array
vector< aggregate< Ti >, Memory, layout_base, grow_p > & private_get_vct_index()
Return the sorted vector of the indexes.
void clear()
Clear all from all the elements.
size_t size()
Return how many element you have in this map.
vector< aggregate< Ti >, Memory, layout_base, grow_p > & getMappingVector()
Return the mapping vector.
auto getBackground() const -> decltype(vct_data.get(vct_data.size() -1))
Set the background to bck (which value get must return when the value is not find)
auto insertFlush(Ti ele, bool &is_new) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
void removeUnusedBuffers()
Eliminate many internal temporary buffer you can use this between flushes if you get some out of memo...
void hostToDevice()
Transfer from host to device.
void merge_indexes(vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_add_index_cont_0, vector< aggregate< Ti, Ti >, Memory, layout_base, grow_p > &vct_add_index_unique, vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_index_tmp, vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_index_tmp2, gpu::ofp_context_t &gpuContext)
Merge indexes.
void reorder_indexes(vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_add_index_cont_0, vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_add_index_cont_1, vector< T, Memory, layout_base, grow_p > &vct_add_data_reord, vector< T, Memory, layout_base, grow_p > &vct_add_data, gpu::ofp_context_t &gpuContext)
sort the continuos array of inserted key
void setGPUInsertBuffer(int nblock, int nslot)
set the gpu insert buffer for every block
void swapIndexVector(vector< aggregate< Ti >, Memory, layout_base, grow_p > &iv)
Ti _branchfree_search_nobck(Ti x, Ti &id) const
get the element i
auto getDataBuffer() const -> const decltype(vct_data)&
Get the data buffer.
auto getDataBuffer() -> decltype(vct_data)&
Get the data buffer.
auto get(Ti id) const -> decltype(vct_data.get(id))
Get an element of the vector.
void preFlush()
In case we manually set the added index buffer and the add data buffer we have to call this function ...
void resize(size_t n)
resize to n elements
Implementation of 1-D std::vector like structure.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
convert a type into constant type
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Transform the boost::fusion::vector into memory specification (memory_traits)
inter_memc< typename T::type >::type type
for each element in the vector interleave memory_c
Transform the boost::fusion::vector into memory specification (memory_traits)
__device__ __host__ void operator()(T &t) const
It call the copy function for each property.
sg_type & sg
encapsulated source object
Functor switch to select the vector sparse for standars scalar and blocked implementation.
static void solveConflicts(vector_index_type &vct_index, vector_index_type &vct_index_tmp, vector_index_type &vct_index_tmp2, vector_index_type &vct_index_tmp3, vector_index_dtmp_type &vct_index_dtmp, vector_index_type &vct_add_index_cont_1, vector_index_type2 &vct_add_index_unique, vector_data_type &vct_data, vector_data_type &vct_add_data, vector_data_type &vct_add_data_unique, vector_data_type &vct_add_data_cont, ite_gpu< 1 > &itew, block_functor &blf, gpu::ofp_context_t &gpuContext)
Merge all datas.
this class is a functor for "for_each" algorithm
vector_data_type & vector_data_red
Vector in which to the reduction.
void operator()(T &t) const
It call the copy function for each property.
vector_index_type_reo & reorder_add_index_cpu
reorder vector index
vector_index_type & vector_index
Index type vector.
vector_data_type & vector_data
Vector in which to the reduction.
sparse_vector_reduction_cpu(vector_data_type &vector_data_red, vector_data_type &vector_data, vector_index_type &vector_index, vector_index_type_reo &reorder_add_index_cpu)
constructor
this class is a functor for "for_each" algorithm
void operator()(T &t) const
It call the copy function for each property.
sparse_vector_reduction_solve_conflict_assign_cpu(encap_src &src, encap_dst &dst)
constructor
encap_dst & dst
destination
this class is a functor for "for_each" algorithm
void operator()(T &t) const
It call the copy function for each property.
encap_dst & dst
destination
sparse_vector_reduction_solve_conflict_reduce_cpu(encap_src &src, encap_dst &dst)
constructor
this class is a functor for "for_each" algorithm
block_functor & blf
block functor
vector_data_type & vector_data_unsorted
new data in an unsorted way
sparse_vector_reduction(vector_data_type &vector_data_red, vector_data_type &vector_data, vector_data_type &vector_data_unsorted, vector_index_type &vector_data_map, vector_index_type2 &segment_offset, block_functor &blf, gpu::ofp_context_t &gpuContext)
constructor
vector_data_type & vector_data
new datas
vector_data_type & vector_data_red
Vector in which to the reduction.
vector_index_type & vector_data_map
map of the data
vector_index_type2 & segment_offset
segment of offsets
gpu::ofp_context_t & gpuContext
gpu context
void operator()(T &t) const
It call the copy function for each property.
this class is a functor for "for_each" algorithm
vector_data_type & vector_data
Vector in which to the reduction.
gpu::ofp_context_t & gpuContext
gpu context
void operator()(T &t) const
It call the copy function for each property.
sparse_vector_special(vector_data_type &vector_data_red, vector_data_type &vector_data, vector_index_type &segment_offset, gpu::ofp_context_t &gpuContext)
constructor
vector_index_type & segment_offset
segment of offsets
vector_data_type & vector_data_red
Vector in which to the reduction.
temporal buffer for reductions