8 #ifndef MAP_VECTOR_SPARSE_HPP_ 9 #define MAP_VECTOR_SPARSE_HPP_ 11 #include "util/cuda_launch.hpp" 12 #include "Vector/map_vector.hpp" 13 #include "Vector/cuda/map_vector_sparse_cuda_ker.cuh" 14 #include "Vector/cuda/map_vector_sparse_cuda_kernels.cuh" 15 #include "util/cuda/ofp_context.hxx" 20 #if !defined(CUDA_ON_CPU) && !defined(__HIP__) 21 #include "util/cuda/moderngpu/kernel_segreduce.hxx" 22 #include "util/cuda/moderngpu/kernel_merge.hxx" 24 #include "util/cuda/kernels.cuh" 27 #include "util/cuda/scan_ofp.cuh" 28 #include "util/cuda/sort_ofp.cuh" 29 #include "util/cuda/segreduce_ofp.cuh" 30 #include "util/cuda/merge_ofp.cuh" 39 template<
typename OfpmVectorT>
40 using ValueTypeOf =
typename std::remove_reference<OfpmVectorT>::type::value_type;
45 template<
typename sg_type>
53 htoD(sg_type &
sg,
unsigned int lele)
62 sg.template hostToDevice<T::value>(lele,lele);
66 constexpr
int VECTOR_SPARSE_STANDARD = 1;
67 constexpr
int VECTOR_SPARSE_BLOCK = 2;
69 template<
typename reduction_type,
unsigned int impl>
72 template<
typename encap_src,
typename encap_dst>
73 static inline void process(encap_src & src, encap_dst & dst)
75 dst = reduction_type::red(dst,src);
79 template<
typename reduction_type>
82 template<
typename encap_src,
typename encap_dst>
83 static inline void process(encap_src & src, encap_dst & dst)
85 for (
size_t i = 0 ; i < encap_src::size ; i++)
87 dst[i] = reduction_type::red(dst[i],src[i]);
92 template<
typename reduction_type>
95 template<
typename encap_src,
typename encap_dst,
unsigned int N1>
96 static inline void process(encap_src & src, encap_dst (& dst)[N1])
98 for (
unsigned int j = 0 ; j < N1 ; j++)
100 for (
size_t i = 0 ; i < encap_dst::size ; i++)
102 dst[i] = reduction_type::red(dst[i][j],src[j][i]);
107 template<
unsigned int N1,
unsigned int blockSize,
typename encap_src,
typename encap_dst>
108 static inline void process_e(encap_src & src, encap_dst & dst)
110 for (
unsigned int j = 0 ; j < N1 ; j++)
112 for (
size_t i = 0 ; i < blockSize ; i++)
114 dst[i] = reduction_type::red(dst[i][j],src[i][j]);
124 template<
unsigned int impl,
typename block_functor>
127 template <
unsigned int p,
typename vector_index_type>
128 static void extendSegments(vector_index_type & segments,
size_t dataSize)
133 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
137 template <
unsigned int pSegment,
typename vector_reduction,
typename T,
typename vector_data_type,
typename vector_index_type ,
typename vector_index_type2>
138 static void segreduce(vector_data_type & vector_data,
139 vector_data_type & vector_data_unsorted,
140 vector_index_type & vector_data_map,
141 vector_index_type2 & segment_offset,
142 vector_data_type & vector_data_red,
144 mgpu::ofp_context_t & context)
147 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
148 typedef typename boost::mpl::at<typename vector_data_type::value_type::type,typename reduction_type::prop>::type red_type;
149 typedef typename reduction_type::template op_red<red_type> red_op;
150 typedef typename boost::mpl::at<typename vector_index_type::value_type::type,boost::mpl::int_<0>>::type seg_type;
154 assert((std::is_same<seg_type,int>::value ==
true));
157 (red_type *)vector_data.template getDeviceBuffer<reduction_type::prop::value>(), vector_data.size(),
158 (
int *)segment_offset.template getDeviceBuffer<1>(), segment_offset.size(),
159 (red_type *)vector_data_red.template getDeviceBuffer<reduction_type::prop::value>(),
160 red_op(),
init, context);
162 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
183 typename vector_data_type,
184 typename vector_index_type,
185 typename vector_index_type2,
186 typename vector_index_dtmp_type,
188 typename ... v_reduce>
190 vector_index_type & vct_index_old,
191 vector_index_type & vct_index_merge,
192 vector_index_type & vct_index_merge_id,
193 vector_index_type & vct_index_out,
194 vector_index_dtmp_type & vct_index_dtmp,
195 vector_index_type & data_map,
196 vector_index_type2 & segments_new,
197 vector_data_type & vct_data_old,
198 vector_data_type & vct_add_data,
199 vector_data_type & vct_add_data_unique,
200 vector_data_type & vct_data_out,
203 mgpu::ofp_context_t & context
208 CUDA_LAUNCH((solve_conflicts<
209 decltype(vct_index_merge.toKernel()),
210 decltype(vct_data_old.toKernel()),
211 decltype(vct_index_dtmp.toKernel()),
216 vct_index_merge.toKernel(),vct_data_old.toKernel(),
217 vct_index_merge_id.toKernel(),vct_add_data_unique.toKernel(),
218 vct_index_out.toKernel(),vct_data_out.toKernel(),
219 vct_index_dtmp.toKernel(),
220 vct_index_old.size());
224 (Ti*)vct_index_dtmp.template getDeviceBuffer<0>(),
225 vct_index_dtmp.size(),
226 (Ti *)vct_index_dtmp.template getDeviceBuffer<1>(),
230 vct_index_dtmp.template deviceToHost<0,1>(vct_index_dtmp.size()-1,vct_index_dtmp.size()-1);
231 int size = vct_index_dtmp.template get<1>(vct_index_dtmp.size()-1) + vct_index_dtmp.template get<0>(vct_index_dtmp.size()-1);
233 vct_index_old.resize(size);
234 vct_data_old.resize(size);
236 CUDA_LAUNCH(realign,itew,vct_index_out.toKernel(),vct_data_out.toKernel(),
237 vct_index_old.toKernel(), vct_data_old.toKernel(),
238 vct_index_dtmp.toKernel());
242 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
248 template<
typename block_functor>
251 template <
unsigned int p,
typename vector_index_type>
252 static void extendSegments(vector_index_type & segments,
size_t dataSize)
256 segments.resize(segments.size()+1);
257 segments.template get<p>(segments.size() - 1) = dataSize;
258 segments.template hostToDevice<p>(segments.size() - 1, segments.size() - 1);
260 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
264 template <
unsigned int pSegment,
typename vector_reduction,
typename T,
typename vector_data_type,
typename vector_index_type ,
typename vector_index_type2>
265 static void segreduce(vector_data_type & vector_data,
266 vector_data_type & vector_data_unsorted,
267 vector_index_type & vector_data_map,
268 vector_index_type2 & segment_offset,
269 vector_data_type & vector_data_red,
271 mgpu::ofp_context_t & context)
277 typename vector_data_type,
278 typename vector_index_type,
279 typename vector_index_type2,
280 typename vector_index_dtmp_type,
282 typename ... v_reduce>
284 vector_index_type & vct_index_old,
285 vector_index_type & vct_index_merge,
286 vector_index_type & vct_index_merge_id,
287 vector_index_type & vct_index_out,
288 vector_index_dtmp_type & vct_index_dtmp,
289 vector_index_type & data_map,
290 vector_index_type2 & segments_new,
291 vector_data_type & vct_data,
292 vector_data_type & vct_add_data,
293 vector_data_type & vct_add_data_unique,
294 vector_data_type & vct_data_out,
297 mgpu::ofp_context_t & context
301 blf.template solve_conflicts<1,
302 decltype(vct_index_merge),
303 decltype(segments_new),
306 (vct_index_merge, vct_index_merge_id, segments_new, data_map,
307 vct_data, vct_add_data,
308 vct_index_old, vct_data_out,
310 vct_data_out.swap(vct_data);
313 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
318 template<
typename Ti>
324 bool operator<(
const reorder & t)
const 330 template<
typename reduction_type,
typename vector_reduction,
typename T,
unsigned int impl,
typename red_type>
333 template<
typename vector_data_type,
typename vector_index_type,
typename vector_index_type_reo>
334 static inline void red(
size_t & i, vector_data_type & vector_data_red,
335 vector_data_type & vector_data,
336 vector_index_type & vector_index,
337 vector_index_type_reo & reorder_add_index_cpu)
339 size_t start = reorder_add_index_cpu.get(i).id;
340 red_type
red = vector_data.template get<reduction_type::prop::value>(i);
343 for ( ; i+j < reorder_add_index_cpu.size() && reorder_add_index_cpu.get(i+j).id == start ; j++)
348 vector_data_red.add();
349 vector_data_red.template get<reduction_type::prop::value>(vector_data_red.size()-1) =
red;
354 vector_index.template get<0>(vector_index.size() - 1) = reorder_add_index_cpu.get(i).id;
362 template<
typename reduction_type,
typename vector_reduction,
typename T,
unsigned int impl,
typename red_type,
unsigned int N1>
365 template<
typename vector_data_type,
typename vector_index_type,
typename vector_index_type_reo>
366 static inline void red(
size_t & i, vector_data_type & vector_data_red,
367 vector_data_type & vector_data,
368 vector_index_type & vector_index,
369 vector_index_type_reo & reorder_add_index_cpu)
371 size_t start = reorder_add_index_cpu.get(i).id;
374 for (
size_t k = 0 ; k < N1 ; k++)
376 red[k] = vector_data.template get<reduction_type::prop::value>(i)[k];
380 for ( ; i+j < reorder_add_index_cpu.size() && reorder_add_index_cpu.get(i+j).id == start ; j++)
382 auto ev = vector_data.template get<reduction_type::prop::value>(i+j);
387 vector_data_red.add();
389 for (
size_t k = 0 ; k < N1 ; k++)
391 vector_data_red.template get<reduction_type::prop::value>(vector_data_red.size()-1)[k] =
red[k];
397 vector_index.template get<0>(vector_index.size() - 1) = reorder_add_index_cpu.get(i).id;
414 template<
typename vector_data_type,
415 typename vector_index_type,
416 typename vector_index_type_reo,
417 typename vector_reduction,
450 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
451 typedef typename boost::mpl::at<typename ValueTypeOf<vector_data_type>::type,
typename reduction_type::prop>::type red_type;
453 if (reduction_type::is_special() ==
false)
493 template<
typename encap_src,
495 typename vector_reduction>
519 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
521 dst.template get<reduction_type::prop::value>() =
src.template get<reduction_type::prop::value>();
526 template<
unsigned int impl,
typename vector_reduction,
typename T,
typename red_type>
529 template<
typename encap_src,
typename encap_dst>
530 static inline void red(encap_src & src, encap_dst & dst)
532 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
538 template<
unsigned int impl,
typename vector_reduction,
typename T,
typename red_type,
unsigned int N1>
541 template<
typename encap_src,
typename encap_dst>
542 static inline void red(encap_src & src, encap_dst & dst)
544 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
546 auto src_e = src.template get<reduction_type::prop::value>();
547 auto dst_e = dst.template get<reduction_type::prop::value>();
563 template<
typename encap_src,
565 typename vector_reduction,
590 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
591 typedef typename boost::mpl::at<typename encap_src::T_type::type, typename reduction_type::prop>::type red_type;
610 template<
typename vector_data_type,
611 typename vector_index_type,
612 typename vector_index_type2,
613 typename vector_reduction,
614 typename block_functor,
615 unsigned int impl2,
unsigned int pSegment=1>
667 typedef typename boost::mpl::at<vector_reduction, T>::type reduction_type;
668 typedef typename boost::mpl::at<typename ValueTypeOf<vector_data_type>::type,
typename reduction_type::prop>::type red_type;
669 if (reduction_type::is_special() ==
false)
681 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file is supposed to be compiled with nvcc" << std::endl;
689 template<
unsigned int pSegment,
typename vector_reduction,
typename T,
typename vector_index_type,
typename vector_data_type>
690 static bool seg_reduce(vector_index_type & segments, vector_data_type & src, vector_data_type & dst)
695 template<
typename vector_index_type,
typename vector_data_type,
typename ... v_reduce>
696 static bool solve_conflicts(vector_index_type &keys, vector_index_type &merge_indices,
697 vector_data_type &data1, vector_data_type &data2,
698 vector_index_type &indices_tmp, vector_data_type &data_tmp,
699 vector_index_type &keysOut, vector_data_type &dataOut,
700 mgpu::ofp_context_t & context)
728 template<
typename vector_data_type,
typename vector_index_type,
typename vector_reduction>
762 typedef typename boost::mpl::at<vector_reduction,T>::type reduction_type;
765 typedef typename boost::mpl::at<typename vector_data_type::value_type::type,typename reduction_type::prop>::type red_type;
767 if (reduction_type::is_special() ==
true)
776 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: this file si supposed to be compiled with nvcc" << std::endl;
782 typename Ti =
long int,
786 typename grow_p=grow_policy_double,
787 unsigned int impl=vect_isel<T>::value,
788 unsigned int impl2 = VECTOR_SPARSE_STANDARD,
789 typename block_functor = stub_block_functor>
830 int n_gpu_add_block_slot = 0;
831 int n_gpu_rem_block_slot = 0;
839 template<
bool prefetch>
842 if (vct_index.
size() == 0) {
id = 0;
return -1;}
843 const Ti *base = &vct_index.template get<0>(0);
844 const Ti *end = (
const Ti *)vct_index.template getPointer<0>() + vct_index.
size();
845 Ti n = vct_data.
size()-1;
851 __builtin_prefetch(base + half/2, 0, 0);
852 __builtin_prefetch(base + half + half/2, 0, 0);
854 base = (base[half] < x) ? base+half : base;
858 int off = (*base < x);
859 id = base - &vct_index.template get<0>(0) + off;
860 return (base + off != end)?*(base + off):-1;
869 template<
bool prefetch>
872 Ti v = _branchfree_search_nobck<prefetch>(x,
id);
873 id = (x == v)?
id:vct_data.
size()-1;
893 mgpu::ofp_context_t & context)
898 vct_nadd_index.resize(vct_nadd_index.
size()+1);
899 vct_nadd_index.template get<0>(vct_nadd_index.
size()-1) = 0;
900 vct_nadd_index.template hostToDevice<0>(vct_nadd_index.
size()-1,vct_nadd_index.
size()-1);
903 vct_index_tmp4.resize(vct_nadd_index.
size());
905 openfpm::scan((Ti *)vct_nadd_index.template getDeviceBuffer<0>(),
906 vct_nadd_index.
size(),
907 (Ti *)vct_index_tmp4.template getDeviceBuffer<0>() ,
910 vct_index_tmp4.template deviceToHost<0>(vct_index_tmp4.
size()-1,vct_index_tmp4.
size()-1);
911 size_t n_ele = vct_index_tmp4.template get<0>(vct_index_tmp4.
size()-1);
914 vct_add_cont_index.resize(n_ele);
915 vct_add_cont_index_map.resize(n_ele);
917 if (impl2 == VECTOR_SPARSE_STANDARD)
919 vct_add_data_cont.resize(n_ele);
923 vct_segment_index_map.resize(n_ele);
926 if (n_gpu_add_block_slot >= 128)
929 itew.wthr.x = vct_nadd_index.
size()-1;
936 CUDA_LAUNCH(construct_insert_list_key_only,itew,vct_add_index.toKernel(),
937 vct_nadd_index.toKernel(),
938 vct_index_tmp4.toKernel(),
939 vct_add_cont_index.toKernel(),
940 vct_add_cont_index_map.toKernel(),
941 n_gpu_add_block_slot);
945 auto itew = vct_add_index.getGPUIterator();
947 CUDA_LAUNCH(construct_insert_list_key_only_small_pool,itew,vct_add_index.toKernel(),
948 vct_nadd_index.toKernel(),
949 vct_index_tmp4.toKernel(),
950 vct_add_cont_index.toKernel(),
951 vct_add_cont_index_map.toKernel(),
952 n_gpu_add_block_slot);
973 mgpu::ofp_context_t & context)
977 itew.wthr.x = vct_nadd_index.
size()-1;
984 size_t n_ele = vct_add_cont_index.size();
986 n_gpu_add_block_slot = 0;
990 (Ti *)vct_add_cont_index.template getDeviceBuffer<0>(),
991 (Ti *)vct_add_cont_index_map.template getDeviceBuffer<0>(),
992 vct_add_cont_index.size(),
993 mgpu::template less_t<Ti>(),
996 auto ite = vct_add_cont_index.getGPUIterator();
1000 if (impl2 == VECTOR_SPARSE_STANDARD)
1002 vct_add_data_reord.resize(n_ele);
1003 CUDA_LAUNCH(reorder_vector_data,ite,vct_add_cont_index_map.toKernel(),vct_add_data_cont.toKernel(),vct_add_data_reord.toKernel());
1015 template<
typename ... v_reduce>
1020 mgpu::ofp_context_t & context)
1024 typedef boost::mpl::vector<v_reduce...> vv_reduce;
1026 auto ite = vct_add_index_sort.getGPUIterator();
1030 vct_add_index_unique.resize(vct_add_index_sort.size()+1);
1032 ite = vct_add_index_sort.getGPUIterator();
1034 vct_index_tmp4.resize(vct_add_index_sort.size()+1);
1038 find_buffer_offsets_for_scan
1040 decltype(vct_add_index_sort.toKernel()),
1041 decltype(vct_index_tmp4.toKernel())
1045 vct_add_index_sort.toKernel(),
1046 vct_index_tmp4.toKernel());
1048 openfpm::scan((Ti *)vct_index_tmp4.template getDeviceBuffer<0>(),vct_index_tmp4.
size(),(Ti *)vct_index_tmp4.template getDeviceBuffer<0>(),context);
1050 vct_index_tmp4.template deviceToHost<0>(vct_index_tmp4.
size()-1,vct_index_tmp4.
size()-1);
1051 int n_ele_unique = vct_index_tmp4.template get<0>(vct_index_tmp4.
size()-1);
1053 vct_add_index_unique.resize(n_ele_unique);
1055 if (impl2 == VECTOR_SPARSE_STANDARD)
1057 vct_add_data_unique.resize(n_ele_unique);
1061 (construct_index_unique<0>),
1063 vct_add_index_sort.toKernel(),
1064 vct_index_tmp4.toKernel(),
1065 vct_add_index_unique.toKernel());
1067 typedef boost::mpl::vector<v_reduce...> vv_reduce;
1072 vct_m_index.resize(vct_index.
size());
1074 if (vct_m_index.
size() != 0)
1076 ite = vct_m_index.getGPUIterator();
1077 CUDA_LAUNCH((set_indexes<0>),ite,vct_m_index.toKernel(),0);
1083 vct_merge_index.resize(vct_index.
size() + vct_add_index_unique.
size());
1084 vct_merge_index_map.resize(vct_index.
size() + vct_add_index_unique.
size());
1085 vct_index_tmp3.resize(vct_index.
size() + vct_add_index_unique.
size());
1089 if (impl2 == VECTOR_SPARSE_STANDARD)
1091 vct_add_data_cont.reserve(vct_index.
size() + vct_add_index_unique.
size()+1);
1092 vct_add_data_cont.resize(vct_index.
size() + vct_add_index_unique.
size());
1095 ite = vct_add_index_unique.getGPUIterator();
1096 vct_index_tmp4.resize(vct_add_index_unique.
size());
1097 CUDA_LAUNCH((set_indexes<0>),ite,vct_index_tmp4.toKernel(),vct_index.
size());
1101 itew.wthr.x = vct_merge_index.size() / 128 + (vct_merge_index.size() % 128 != 0);
1108 vct_index_dtmp.resize(itew.wthr.x);
1113 openfpm::merge((Ti *)vct_index.template getDeviceBuffer<0>(),(Ti *)vct_m_index.template getDeviceBuffer<0>(),vct_index.
size(),
1114 (Ti *)vct_add_index_unique.template getDeviceBuffer<0>(),(Ti *)vct_index_tmp4.template getDeviceBuffer<0>(),vct_add_index_unique.
size(),
1115 (Ti *)vct_merge_index.template getDeviceBuffer<0>(),(Ti *)vct_merge_index_map.template getDeviceBuffer<0>(),
mgpu::less_t<Ti>(),context);
1123 template<
typename ... v_reduce>
1128 mgpu::ofp_context_t & context)
1132 itew.wthr.x = vct_index_tmp.
size() / 128 + (vct_index_tmp.
size() % 128 != 0);
1139 typedef boost::mpl::vector<v_reduce...> vv_reduce;
1145 ::template extendSegments<1>(vct_add_index_unique, vct_add_data_reord_map.size());
1147 if (impl2 == VECTOR_SPARSE_STANDARD)
1150 decltype(vct_add_data_reord_map),
1151 decltype(vct_add_index_unique),vv_reduce,block_functor,impl2>
1153 vct_add_data_unique,
1156 vct_add_data_reord_map,
1157 vct_add_index_unique,
1161 boost::mpl::for_each_ref<boost::mpl::range_c<
int,0,
sizeof...(v_reduce)>>(svr);
1164 sparse_vector_special<
typename std::remove_reference<decltype(vct_add_data)>::type,
1165 decltype(vct_add_index_unique),
1166 vv_reduce> svr2(vct_add_data_unique,vct_add_data_reord,vct_add_index_unique,context);
1167 boost::mpl::for_each_ref<boost::mpl::range_c<
int,0,
sizeof...(v_reduce)>>(svr2);
1172 scalar_block_implementation_switch<impl2, block_functor>::template solveConflicts<
1174 decltype(vct_index),
1175 decltype(segments_new),
1176 decltype(vct_index_dtmp),
1186 vct_add_data_reord_map,
1190 vct_add_data_unique,
1199 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
1203 template<
typename ... v_reduce>
1204 void flush_on_gpu_insert(vector<
aggregate<Ti>,Memory,layout_base,grow_p> & vct_add_index_cont_0,
1205 vector<
aggregate<Ti>,Memory,layout_base,grow_p> & vct_add_index_cont_1,
1206 vector<T,Memory,layout_base,grow_p> & vct_add_data_reord,
1207 mgpu::ofp_context_t & context)
1212 if (n_gpu_add_block_slot == 0 || vct_add_index.
size() == 0)
1217 size_t n_ele = make_continuos(vct_nadd_index,vct_add_index,vct_add_index_cont_0,vct_add_index_cont_1,
1218 vct_add_data,vct_add_data_cont,context);
1222 if (vct_add_index_cont_0.
size() == 0)
1225 reorder_indexes(vct_add_index_cont_0,vct_add_index_cont_1,vct_add_data_reord,vct_add_data,context);
1227 merge_indexes<v_reduce ... >(vct_add_index_cont_0,vct_add_index_unique,
1228 vct_index_tmp,vct_index_tmp2,
1231 merge_datas<v_reduce ... >(vct_add_data_reord,vct_add_index_unique,vct_add_data,vct_add_index_cont_1,context);
1234 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: you are supposed to compile this file with nvcc, if you want to use it with gpu" << std::endl;
1239 void flush_on_gpu_remove(
1240 mgpu::ofp_context_t & context)
1245 vct_nrem_index.resize(vct_nrem_index.
size()+1);
1246 vct_nrem_index.template get<0>(vct_nrem_index.
size()-1) = 0;
1247 vct_nrem_index.template hostToDevice<0>(vct_nrem_index.
size()-1,vct_nrem_index.
size()-1);
1250 vct_index_tmp4.resize(vct_nrem_index.
size());
1252 openfpm::scan((Ti *)vct_nrem_index.template getDeviceBuffer<0>(), vct_nrem_index.
size(), (Ti *)vct_index_tmp4.template getDeviceBuffer<0>() , context);
1254 vct_index_tmp4.template deviceToHost<0>(vct_index_tmp4.
size()-1,vct_index_tmp4.
size()-1);
1255 size_t n_ele = vct_index_tmp4.template get<0>(vct_index_tmp4.
size()-1);
1258 vct_add_index_cont_0.resize(n_ele);
1259 vct_add_index_cont_1.resize(n_ele);
1262 itew.wthr.x = vct_nrem_index.
size()-1;
1269 CUDA_LAUNCH(construct_remove_list,itew,vct_rem_index.toKernel(),
1270 vct_nrem_index.toKernel(),
1271 vct_index_tmp4.toKernel(),
1272 vct_add_index_cont_0.toKernel(),
1273 vct_add_index_cont_1.toKernel(),
1274 n_gpu_rem_block_slot);
1277 openfpm::sort((Ti *)vct_add_index_cont_0.template getDeviceBuffer<0>(),(Ti *)vct_add_index_cont_1.template getDeviceBuffer<0>(),
1278 vct_add_index_cont_0.
size(), mgpu::template less_t<Ti>(), context);
1280 auto ite = vct_add_index_cont_0.getGPUIterator();
1284 vct_add_index_unique.resize(vct_add_index_cont_0.
size()+1);
1286 ite = vct_add_index_cont_0.getGPUIterator();
1290 CUDA_LAUNCH((find_buffer_offsets_zero<0,decltype(vct_add_index_cont_0.toKernel()),decltype(vct_add_index_unique.toKernel())>),
1292 vct_add_index_cont_0.toKernel(),(
int *)mem.
getDevicePointer(),vct_add_index_unique.toKernel());
1297 vct_add_index_unique.resize(n_ele_unique);
1299 openfpm::sort((Ti *)vct_add_index_unique.template getDeviceBuffer<1>(),(Ti *)vct_add_index_unique.template getDeviceBuffer<0>(),
1300 vct_add_index_unique.
size(),mgpu::template less_t<Ti>(),context);
1305 vct_m_index.resize(vct_index.
size() + vct_add_index_unique.
size());
1307 ite = vct_m_index.getGPUIterator();
1308 CUDA_LAUNCH((set_indexes<0>),ite,vct_m_index.toKernel(),0);
1310 ite = vct_add_index_unique.getGPUIterator();
1311 CUDA_LAUNCH((set_indexes<1>),ite,vct_add_index_unique.toKernel(),vct_index.
size());
1316 vct_index_tmp.resize(vct_index.
size() + vct_add_index_unique.
size());
1317 vct_index_tmp2.resize(vct_index.
size() + vct_add_index_unique.
size());
1319 itew.wthr.x = vct_index_tmp.
size() / 128 + (vct_index_tmp.
size() % 128 != 0);
1326 vct_index_dtmp.resize(itew.wthr.x);
1330 openfpm::merge((Ti *)vct_index.template getDeviceBuffer<0>(),(Ti *)vct_m_index.template getDeviceBuffer<0>(),vct_index.
size(),
1331 (Ti *)vct_add_index_unique.template getDeviceBuffer<0>(),(Ti *)vct_add_index_unique.template getDeviceBuffer<1>(),vct_add_index_unique.
size(),
1332 (Ti *)vct_index_tmp.template getDeviceBuffer<0>(),(Ti *)vct_index_tmp2.template getDeviceBuffer<0>(),
mgpu::less_t<Ti>(),context);
1334 vct_index_tmp3.resize(128*itew.wthr.x);
1336 CUDA_LAUNCH((solve_conflicts_remove<decltype(vct_index_tmp.toKernel()),decltype(vct_index_dtmp.toKernel()),128>),
1338 vct_index_tmp.toKernel(),
1339 vct_index_tmp2.toKernel(),
1340 vct_index_tmp3.toKernel(),
1341 vct_m_index.toKernel(),
1342 vct_index_dtmp.toKernel(),
1346 openfpm::scan((Ti*)vct_index_dtmp.template getDeviceBuffer<0>(),vct_index_dtmp.
size(),(Ti *)vct_index_dtmp.template getDeviceBuffer<1>(),context);
1349 vct_index_dtmp.template deviceToHost<0,1>(vct_index_dtmp.
size()-1,vct_index_dtmp.
size()-1);
1350 int size = vct_index_dtmp.template get<1>(vct_index_dtmp.
size()-1) + vct_index_dtmp.template get<0>(vct_index_dtmp.
size()-1);
1352 vct_add_data_cont.resize(
size);
1353 vct_index.resize(
size);
1355 CUDA_LAUNCH(realign_remove,itew,vct_index_tmp3.toKernel(),vct_m_index.toKernel(),vct_data.toKernel(),
1356 vct_index.toKernel(),vct_add_data_cont.toKernel(),
1357 vct_index_dtmp.toKernel());
1359 vct_data.swap(vct_add_data_cont);
1362 std::cout << __FILE__ <<
":" << __LINE__ <<
" error: you are suppose to compile this file with nvcc, if you want to use it with gpu" << std::endl;
1369 vct_data.resize(vct_data.
size()+1);
1370 vct_data.get(vct_data.
size()-1) = bck;
1372 htoD<decltype(vct_data)> trf(vct_data,vct_data.
size()-1);
1373 boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(trf);
1376 template<
typename ... v_reduce>
1377 void flush_on_gpu(vector<
aggregate<Ti>,Memory,layout_base,grow_p> & vct_add_index_cont_0,
1378 vector<
aggregate<Ti>,Memory,layout_base,grow_p> & vct_add_index_cont_1,
1379 vector<T,Memory,layout_base,grow_p> & vct_add_data_reord,
1380 mgpu::ofp_context_t & context)
1382 flush_on_gpu_insert<v_reduce ... >(vct_add_index_cont_0,vct_add_index_cont_1,vct_add_data_reord,context);
1385 template<
typename ... v_reduce>
1388 if (vct_add_index.
size() == 0)
1392 reorder_add_index_cpu.resize(vct_add_index.
size());
1393 vct_add_data_cont.resize(vct_add_index.
size());
1395 for (
size_t i = 0 ; i < reorder_add_index_cpu.
size() ; i++)
1397 reorder_add_index_cpu.get(i).id = vct_add_index.template get<0>(i);
1398 reorder_add_index_cpu.get(i).id2 = i;
1401 reorder_add_index_cpu.sort();
1404 for (
size_t i = 0 ; i < reorder_add_index_cpu.
size() ; i++)
1406 vct_add_data_cont.get(i) = vct_add_data.get(reorder_add_index_cpu.get(i).id2);
1409 typedef boost::mpl::vector<v_reduce...> vv_reduce;
1411 sparse_vector_reduction_cpu<decltype(vct_add_data),
1412 decltype(vct_add_index_unique),
1413 decltype(reorder_add_index_cpu),
1416 svr(vct_add_data_unique,
1418 vct_add_index_unique,
1419 reorder_add_index_cpu);
1421 boost::mpl::for_each_ref<boost::mpl::range_c<
int,0,
sizeof...(v_reduce)>>(svr);
1425 vector<T,Memory,layout_base,grow_p,impl> vct_data_tmp;
1426 vector<aggregate<Ti>,Memory,layout_base,grow_p> vct_index_tmp;
1428 vct_data_tmp.resize(vct_data.
size() + vct_add_data_unique.
size());
1429 vct_index_tmp.resize(vct_index.
size() + vct_add_index_unique.
size());
1435 for ( ; i < vct_data_tmp.size() ; i++)
1437 Ti id_a = (ai < vct_add_index_unique.
size())?vct_add_index_unique.template get<0>(ai):std::numeric_limits<Ti>::max();
1438 Ti id_d = (di < vct_index.
size())?vct_index.template get<0>(di):std::numeric_limits<Ti>::max();
1442 vct_index_tmp.template get<0>(i) = id_a;
1446 auto dst = vct_data_tmp.get(i);
1447 auto src = vct_add_data_unique.get(ai);
1449 sparse_vector_reduction_solve_conflict_assign_cpu<decltype(vct_data_tmp.get(i)),
1450 decltype(vct_add_data.get(ai)),
1454 boost::mpl::for_each_ref<boost::mpl::range_c<
int,0,
sizeof...(v_reduce)>>(sva);
1457 dst = vct_data_tmp.get(i);
1458 src = vct_data.get(di);
1460 sparse_vector_reduction_solve_conflict_reduce_cpu<decltype(vct_data_tmp.get(i)),
1461 decltype(vct_data.get(di)),
1465 boost::mpl::for_each_ref<boost::mpl::range_c<
int,0,
sizeof...(v_reduce)>>(svr);
1469 vct_data_tmp.resize(vct_data_tmp.size()-1);
1470 vct_index_tmp.resize(vct_index_tmp.size()-1);
1474 vct_index_tmp.template get<0>(i) = vct_add_index_unique.template get<0>(ai);
1475 vct_data_tmp.get(i) = vct_add_data_unique.get(ai);
1481 vct_index_tmp.template get<0>(i) = vct_index.template get<0>(di);
1482 vct_data_tmp.get(i) = vct_data.get(di);
1487 vct_index.swap(vct_index_tmp);
1488 vct_data.swap(vct_data_tmp);
1490 vct_add_data.clear();
1491 vct_add_index.clear();
1492 vct_add_index_unique.clear();
1493 vct_add_data_unique.clear();
1554 this->_branchfree_search<false>(
id,di);
1571 template <
unsigned int p>
1572 inline auto get(Ti
id)
const -> decltype(vct_data.template get<p>(
id))
1575 this->_branchfree_search<false>(
id,di);
1576 return vct_data.template get<p>(di);
1589 inline auto get(Ti
id)
const -> decltype(vct_data.get(
id))
1592 this->_branchfree_search<false>(
id,di);
1593 return vct_data.get(di);
1624 template <
unsigned int p>
1627 return vct_data.template get<p>(vct_data.
size()-1);
1637 return vct_data.get(vct_data.
size()-1);
1640 template<
unsigned int p>
1641 void setBackground(
const typename boost::mpl::at<
typename T::type, boost::mpl::int_<p>>::type & bck_)
1644 typename std::remove_reference<decltype(vct_data.template get<p>(vct_data.
size()-1))>::type>
1645 ::meta_copy_d_(bck_,vct_data.template get<p>(vct_data.
size()-1));
1647 vct_data.template hostToDevice<p>(vct_data.
size()-1,vct_data.
size()-1);
1650 ::meta_copy_(bck_,bck.template get<p>());
1660 template <
unsigned int p>
1661 auto insert(Ti
ele) -> decltype(vct_data.template get<p>(0))
1663 vct_add_index.add();
1664 vct_add_index.template get<0>(vct_add_index.
size()-1) =
ele;
1666 return vct_add_data.template get<p>(vct_add_data.
size()-1);
1676 template <
unsigned int p>
1688 return vct_data.template get<p>(di);
1693 vct_index.insert(di);
1696 return vct_data.template get<p>(di);
1710 Ti v = _branchfree_search_nobck<true>(
ele,di);
1715 return vct_data.get(di);
1719 vct_index.insert(di);
1720 vct_data.insert(di);
1723 vct_index.template get<0>(di) =
ele;
1725 return vct_data.get(di);
1735 vct_add_index.add();
1736 vct_add_index.template get<0>(vct_add_index.
size()-1) =
ele;
1738 return vct_add_data.get(vct_add_data.
size()-1);
1748 template<
typename ... v_reduce>
1750 mgpu::ofp_context_t & context,
1751 flush_type opt = FLUSH_ON_HOST,
1755 vct_data.resize(vct_index.
size());
1757 if (opt & flush_type::FLUSH_ON_DEVICE)
1758 {this->flush_on_gpu<v_reduce ... >(vct_add_index_cont_0,vct_add_index_cont_1,vct_add_data_reord,context,i);}
1760 {this->flush_on_cpu<v_reduce ... >();}
1772 template<
typename ... v_reduce>
1774 mgpu::ofp_context_t & context,
1775 flush_type opt = FLUSH_ON_HOST)
1778 vct_data.resize(vct_index.
size());
1780 if (opt & flush_type::FLUSH_ON_DEVICE)
1781 {this->flush_on_gpu<v_reduce ... >(vct_add_index_cont_0,vct_add_index_cont_1,vct_add_data_reord,context);}
1783 {this->flush_on_cpu<v_reduce ... >();}
1793 template<
typename ... v_reduce>
1794 void flush(mgpu::ofp_context_t & context, flush_type opt = FLUSH_ON_HOST)
1797 vct_data.resize(vct_index.
size());
1799 if (opt & flush_type::FLUSH_ON_DEVICE)
1800 {this->flush_on_gpu<v_reduce ... >(vct_add_index_cont_0,vct_add_index_cont_1,vct_add_data_reord,context);}
1802 {this->flush_on_cpu<v_reduce ... >();}
1812 void flush_remove(mgpu::ofp_context_t & context, flush_type opt = FLUSH_ON_HOST)
1814 vct_data.resize(vct_data.
size()-1);
1816 if (opt & flush_type::FLUSH_ON_DEVICE)
1817 {this->flush_on_gpu_remove(context);}
1820 std::cerr << __FILE__ <<
":" << __LINE__ <<
" error, flush_remove on CPU has not implemented yet";
1832 return vct_index.
size();
1850 template<
unsigned int ... prp>
1853 vct_index.template deviceToHost<0>();
1862 template<
unsigned int ... prp>
1865 vct_index.template hostToDevice<0>();
1877 vct_add_index.toKernel(),
1878 vct_rem_index.toKernel(),vct_add_data.toKernel(),
1879 vct_nadd_index.toKernel(),
1880 vct_nrem_index.toKernel(),
1881 n_gpu_add_block_slot,
1882 n_gpu_rem_block_slot);
1895 vct_add_index.resize(nblock*nslot);
1896 vct_nadd_index.resize(nblock);
1897 vct_add_data.resize(nblock*nslot);
1898 n_gpu_add_block_slot = nslot;
1899 vct_nadd_index.template fill<0>(0);
1910 vct_nadd_index.resize(vct_add_index.
size());
1912 if (vct_nadd_index.
size() != 0)
1914 auto ite = vct_nadd_index.getGPUIterator();
1915 CUDA_LAUNCH((set_one_insert_buffer),ite,vct_nadd_index.toKernel());
1917 n_gpu_add_block_slot = 1;
1927 return vct_add_data;
1938 vct_rem_index.resize(nblock*nslot);
1939 vct_nrem_index.resize(nblock);
1940 n_gpu_rem_block_slot = nslot;
1941 vct_nrem_index.template fill<0>(0);
1951 auto getGPUIterator() -> decltype(vct_index.getGPUIterator())
1953 return vct_index.getGPUIterator();
1966 vct_add_index.clear();
1967 vct_add_data.clear();
1970 vct_data.resize(vct_data.
size()+1);
1971 vct_data.get(vct_data.
size()-1) = bck;
1973 htoD<decltype(vct_data)> trf(vct_data,vct_data.
size()-1);
1974 boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(trf);
1977 n_gpu_add_block_slot = 0;
1978 n_gpu_rem_block_slot = 0;
1983 vct_data.swap(sp.vct_data);
1984 vct_index.swap(sp.vct_index);
1985 vct_add_index.swap(sp.vct_add_index);
1986 vct_add_data.swap(sp.vct_add_data);
1988 size_t max_ele_ = sp.max_ele;
1989 sp.max_ele = max_ele;
1990 this->max_ele = max_ele_;
1993 vector<T,Memory,layout_base,grow_p> & private_get_vct_add_data()
1995 return vct_add_data;
1998 vector<aggregate<Ti>,Memory,layout_base,grow_p> & private_get_vct_add_index()
2000 return vct_add_index;
2003 const vector<aggregate<Ti>,Memory,layout_base,grow_p> & private_get_vct_add_index()
const 2005 return vct_add_index;
2008 vector<aggregate<Ti>,Memory,layout_base,grow_p> & private_get_vct_nadd_index()
2010 return vct_nadd_index;
2013 const vector<aggregate<Ti>,Memory,layout_base,grow_p> & private_get_vct_nadd_index()
const 2015 return vct_nadd_index;
2018 auto getSegmentToOutMap() -> decltype(blf.get_outputMap())
2020 return blf.get_outputMap();
2023 auto getSegmentToOutMap() const -> decltype(blf.get_outputMap())
2025 return blf.get_outputMap();
2034 vct_add_data.resize(0);
2035 vct_add_data.shrink_to_fit();
2037 vct_add_data.resize(0);
2038 vct_add_data.shrink_to_fit();
2040 vct_add_data_reord.resize(0);
2041 vct_add_data_reord.shrink_to_fit();
2043 vct_add_data_cont.resize(0);
2044 vct_add_data_cont.shrink_to_fit();
2046 vct_add_data_unique.resize(0);
2047 vct_add_data_unique.shrink_to_fit();
2056 return vct_add_index_unique;
2059 vector<aggregate<Ti,Ti>,Memory,layout_base,grow_p> & getSegmentToMergeIndexMap()
const 2061 return vct_add_index_unique;
2084 return vct_add_index_cont_1;
2107 return vct_index_tmp2;
2112 template<
typename T,
unsigned int blockSwitch = VECTOR_SPARSE_STANDARD,
typename block_functor = stub_block_functor,
typename indexT =
int>
2120 vect_isel<T>::value,
2125 template<
typename T,
typename block_functor = stub_block_functor,
typename indexT =
long int>
2133 vect_isel<T>::value,
2134 VECTOR_SPARSE_BLOCK,
void operator()(T &t) const
It call the copy function for each property.
void flush_remove(mgpu::ofp_context_t &context, flush_type opt=FLUSH_ON_HOST)
merge the added element to the main data array
encap_dst & dst
destination
this class is a functor for "for_each" algorithm
convert a type into constant type
vector_data_type & vector_data_red
Vector in which to the reduction.
this class is a functor for "for_each" algorithm
void deviceToHost()
Transfer from device to host.
void reorder_indexes(vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_add_cont_index, vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_add_cont_index_map, vector< T, Memory, layout_base, grow_p > &vct_add_data_reord, vector< T, Memory, layout_base, grow_p > &vct_add_data_cont, mgpu::ofp_context_t &context)
sort the continuos array of inserted key
Transform the boost::fusion::vector into memory specification (memory_traits)
void setGPURemoveBuffer(int nblock, int nslot)
set the gpu remove buffer for every block
void operator()(T &t) const
It call the copy function for each property.
void preFlush()
In case we manually set the added index buffer and the add data buffer we have to call this function ...
this class is a functor for "for_each" algorithm
block_functor & blf
block functor
vector_data_type & vector_data
Vector in which to the reduction.
sparse_vector_reduction_cpu(vector_data_type &vector_data_red, vector_data_type &vector_data, vector_index_type &vector_index, vector_index_type_reo &reorder_add_index_cpu)
constructor
virtual bool allocate(size_t sz)
allocate memory
void setGPUInsertBuffer(int nblock, int nslot)
set the gpu insert buffer for every block
sparse_vector_reduction(vector_data_type &vector_data_red, vector_data_type &vector_data, vector_data_type &vector_data_unsorted, vector_index_type &vector_data_map, vector_index_type2 &segment_offset, block_functor &blf, mgpu::ofp_context_t &context)
constructor
vector_data_type & vector_data_red
Vector in which to the reduction.
sparse_vector_reduction_solve_conflict_assign_cpu(encap_src &src, encap_dst &dst)
constructor
Functor switch to select the vector sparse for standars scalar and blocked implementation.
auto getDataBuffer() const -> const decltype(vct_data)&
Get the data buffer.
vector_data_type & vector_data_unsorted
new data in an unsorted way
encap_dst & dst
destination
vector< aggregate< Ti >, Memory, layout_base, grow_p > & getMappingVector()
Return the mapping vector.
vector_index_type2 & segment_offset
segment of offsets
virtual void * getPointer()
get a readable pointer with the data
mgpu::ofp_context_t & context
gpu context
vector< aggregate< Ti >, Memory, layout_base, grow_p > & private_get_vct_index()
Return the sorted vector of the indexes.
vector_data_type & vector_data
new datas
void operator()(T &t) const
It call the copy function for each property.
auto get(Ti id) const -> decltype(vct_data.get(id))
Get an element of the vector.
void hostToDevice()
Transfer from host to device.
auto getGPUInsertBuffer() -> decltype(vct_add_data)&
Get the GPU insert buffer.
This class allocate, and destroy CPU memory.
vector_data_type & vector_data
Vector in which to the reduction.
Transform the boost::fusion::vector into memory specification (memory_traits)
void flush_vd(vector< T, Memory, layout_base, grow_p > &vct_add_data_reord, mgpu::ofp_context_t &context, flush_type opt=FLUSH_ON_HOST)
merge the added element to the main data array but save the insert buffer in v
vector_index_type & vector_index
Index type vector.
virtual void fill(unsigned char c)
fill the buffer with a byte
auto insertFlush(Ti ele, bool &is_new) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
vector_index_type_reo & reorder_add_index_cpu
reorder vector index
virtual void * getDevicePointer()
get a readable pointer with the data
inter_memc< typename T::type >::type type
for each element in the vector interleave memory_c
sparse_vector_reduction_solve_conflict_reduce_cpu(encap_src &src, encap_dst &dst)
constructor
sparse_vector_special(vector_data_type &vector_data_red, vector_data_type &vector_data, vector_index_type &segment_offset, mgpu::ofp_context_t &context)
constructor
auto getBackground() const -> decltype(vct_data.get(vct_data.size() -1))
Set the background to bck (which value get must return when the value is not find)
vector_index_type & segment_offset
segment of offsets
auto getIndexBuffer() const -> const decltype(vct_index)&
Get the indices buffer.
void flush_v(vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_add_index_cont_0, mgpu::ofp_context_t &context, flush_type opt=FLUSH_ON_HOST, int i=0)
merge the added element to the main data array but save the insert buffer in v
Ti _branchfree_search_nobck(Ti x, Ti &id) const
get the element i
void operator()(T &t) const
It call the copy function for each property.
vector_index_type & vector_data_map
map of the data
void _branchfree_search(Ti x, Ti &id) const
get the element i
auto insert(Ti ele) -> decltype(vct_data.get(0))
It insert an element in the sparse vector.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
size_t size()
Return how many element you have in this map.
vector_sparse_gpu_ker< T, Ti, layout_base > toKernel()
toKernel function transform this structure into one that can be used on GPU
auto getIndexBuffer() -> decltype(vct_index)&
Get the indices buffer.
void removeUnusedBuffers()
Eliminate many internal temporary buffer you can use this between flushes if you get some out of memo...
static void solveConflicts(vector_index_type &vct_index_old, vector_index_type &vct_index_merge, vector_index_type &vct_index_merge_id, vector_index_type &vct_index_out, vector_index_dtmp_type &vct_index_dtmp, vector_index_type &data_map, vector_index_type2 &segments_new, vector_data_type &vct_data_old, vector_data_type &vct_add_data, vector_data_type &vct_add_data_unique, vector_data_type &vct_data_out, ite_gpu< 1 > &itew, block_functor &blf, mgpu::ofp_context_t &context)
void resize(size_t n)
resize to n elements
virtual void deviceToHost()
Move memory from device to host.
void flush(mgpu::ofp_context_t &context, flush_type opt=FLUSH_ON_HOST)
merge the added element to the main data array
auto getDataBuffer() -> decltype(vct_data)&
Get the data buffer.
void operator()(T &t) const
It call the copy function for each property.
auto get(Ti id) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
auto getBackground() const -> decltype(vct_data.template get< p >(vct_data.size() -1))
Set the background to bck (which value get must return when the value is not find)
sg_type & sg
encapsulated source object
auto insertFlush(Ti ele, bool &is_new) -> decltype(vct_data.get(0))
It insert an element in the sparse vector.
this class is a functor for "for_each" algorithm
void clear()
Clear all from all the elements.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
vector_data_type & vector_data_red
Vector in which to the reduction.
temporal buffer for reductions
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
this class is a functor for "for_each" algorithm
Implementation of 1-D std::vector like structure.
vector< aggregate< Ti >, Memory, layout_base, grow_p > & getMergeIndexMapVector()
Return the merge mapping vector.
auto insert(Ti ele) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
void merge_indexes(vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_add_index_sort, vector< aggregate< Ti, Ti >, Memory, layout_base, grow_p > &vct_add_index_unique, vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_merge_index, vector< aggregate< Ti >, Memory, layout_base, grow_p > &vct_merge_index_map, mgpu::ofp_context_t &context)
Merge indexes.
mgpu::ofp_context_t & context
gpu context
void swapIndexVector(vector< aggregate< Ti >, Memory, layout_base, grow_p > &iv)
__device__ __host__ void operator()(T &t) const
It call the copy function for each property.
openfpm::sparse_index< Ti > get_sparse(Ti id) const
Get the sparse index.