OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
CellList_gpu.hpp
1/*
2 * CellList_gpu.hpp
3 *
4 * Created on: Jun 11, 2018
5 * Author: i-bird
6 */
7
8#ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
9#define OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
10
11#include "config.h"
12
13#ifdef CUDA_GPU
14
15#include "Vector/map_vector_sparse.hpp"
16#include "NN/CellList/CellDecomposer.hpp"
17#include "Vector/map_vector.hpp"
18#include "Cuda_cell_list_util_func.hpp"
19#include "NN/CellList/cuda/CellList_gpu_ker.cuh"
20#include "util/cuda_util.hpp"
21#include "NN/CellList/CellList_util.hpp"
22#include "NN/CellList/CellList.hpp"
23#include "util/cuda/scan_ofp.cuh"
24
25constexpr int count = 0;
26constexpr int start = 1;
27
28template<unsigned int dim, typename T,
29 typename cnt_type, typename ids_type,
30 typename Memory,typename transform,
31 typename vector_cnt_type, typename vector_cnt_type2,
32 typename cl_sparse_type,
33 bool is_sparse>
34struct CellList_gpu_ker_selector
35{
36 static inline CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse> get(vector_cnt_type & starts,
37 vector_cnt_type & cell_nn,
38 vector_cnt_type2 & cell_nn_list,
39 cl_sparse_type & cl_sparse,
40 vector_cnt_type & sorted_to_not_sorted,
41 vector_cnt_type & sorted_domain_particles_ids,
46 const transform & t,
47 unsigned int g_m,
48 const SpaceBox<dim,T>& box_unit,
49 const grid_sm<dim,void>& gr_cell,
50 const Point<dim,long int>& cell_shift)
51 {
53 sorted_to_not_sorted.toKernel(),
54 sorted_domain_particles_ids.toKernel(),
55 nnc_rad.toKernel(),
56 spacing_c,
57 div_c,
58 off,
59 t,
60 g_m,
61 box_unit,
62 gr_cell,
63 cell_shift);
64 }
65};
66
67template<unsigned int dim, typename T,
68 typename cnt_type, typename ids_type,
69 typename Memory,typename transform,
70 typename vector_cnt_type, typename vector_cnt_type2,
71 typename cl_sparse_type>
72struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector_cnt_type,vector_cnt_type2,cl_sparse_type,true>
73{
74 static CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true> get(vector_cnt_type & starts,
75 vector_cnt_type & cell_nn,
76 vector_cnt_type2 & cell_nn_list,
77 cl_sparse_type & cl_sparse,
78 vector_cnt_type & srt,
79 vector_cnt_type & dprt,
84 const transform & t,
85 unsigned int g_m,
86 const SpaceBox<dim,T>& box_unit,
87 const grid_sm<dim,void>& gr_cell,
88 const Point<dim,long int>& cell_shift)
89
90 {
92 cell_nn_list.toKernel(),
93 cl_sparse.toKernel(),
94 srt.toKernel(),
95 dprt.toKernel(),
96 spacing_c,
97 div_c,
98 off,
99 t,
100 g_m,
101 box_unit,
102 gr_cell,
103 cell_shift);
104 }
105};
106
107template<unsigned int dim,
108 typename T,
109 typename Memory,
110 typename transform = no_transform_only<dim,T>,
111 typename cnt_type = unsigned int,
112 typename ids_type = int,
113 bool is_sparse = false>
114class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
115{
116 typedef openfpm::vector<aggregate<cnt_type>,Memory,memory_traits_inte> vector_cnt_type;
117
119 vector_cnt_type cl_n;
120
122 vector_cnt_type cells;
123
125 vector_cnt_type starts;
126
129
132
135
138
140 int cells_nn_test_size;
141
144
146 vector_cnt_type sorted_to_not_sorted;
147
149 vector_cnt_type sorted_domain_particles_dg;
150
152 vector_cnt_type sorted_domain_particles_ids;
153
155 vector_cnt_type non_sorted_to_sorted;
156
159
162
165
168
171 size_t n_dec;
172
174 void InitializeStructures(const size_t (& div)[dim], size_t tot_n_cell, size_t pad)
175 {
176 for (size_t i = 0 ; i < dim ; i++)
177 {
178 div_c[i] = div[i];
179 spacing_c[i] = this->getCellBox().getP2().get(i);
180 off[i] = pad;
181 }
182
183 cl_n.resize(tot_n_cell);
184
185 cells_nn_test_size = 1;
186 construct_cell_nn_test(cells_nn_test_size);
187 }
188
189 void construct_cell_nn_test(unsigned int box_nn = 1)
190 {
191 auto & gs = this->getGrid();
192
193 grid_key_dx<dim> start;
194 grid_key_dx<dim> stop;
195 grid_key_dx<dim> middle;
196
197 for (size_t i = 0 ; i < dim ; i++)
198 {
199 start.set_d(i,0);
200 stop.set_d(i,2*box_nn);
201 middle.set_d(i,box_nn);
202 }
203
204 cells_nn_test.resize(openfpm::math::pow(2*box_nn+1,dim));
205
206 int mid = gs.LinId(middle);
207
208 grid_key_dx_iterator_sub<dim> it(gs,start,stop);
209
210 size_t i = 0;
211 while (it.isNext())
212 {
213 auto p = it.get();
214
215 cells_nn_test.template get<0>(i) = (int)gs.LinId(p) - mid;
216
217 ++i;
218 ++it;
219 }
220
221 cells_nn_test.template hostToDevice<0>();
222
223#if defined(__NVCC__) && defined(USE_LOW_REGISTER_ITERATOR)
224
225 // copy to the constant memory
226 cudaMemcpyToSymbol(cells_striding,cells_nn_test.template getPointer<0>(),cells_nn_test.size()*sizeof(int));
227
228#endif
229 }
230
235 template<typename vector, typename vector_prp, unsigned int ... prp>
236 void construct_sparse(vector & pl,
237 vector & pl_out,
238 vector_prp & pl_prp,
239 vector_prp & pl_prp_out,
240 gpu::ofp_context_t & gpuContext,
241 size_t g_m,
242 size_t start,
243 size_t stop,
244 cl_construct_opt opt = cl_construct_opt::Full)
245 {
246#ifdef __NVCC__
247
248 part_ids.resize(stop - start);
249 starts.resize(stop - start);
250
251 // Than we construct the ids
252
253 auto ite_gpu = pl.getGPUIteratorTo(stop-start);
254
255 if (ite_gpu.wthr.x == 0)
256 {
257 return;
258 }
259
260 CUDA_LAUNCH((subindex<true,dim,T,cnt_type,ids_type>),ite_gpu,div_c,
261 spacing_c,
262 off,
263 this->getTransform(),
264 pl.size(),
265 start,
266 pl.toKernel(),
267 starts.toKernel(),
268 part_ids.toKernel());
269
270 // now we construct the cells
271
272 cells.resize(stop-start);
273
274 // Here we fill the sparse vector
275 cl_sparse.clear();
276 cl_sparse.template setBackground<0>((cnt_type)-1);
277 cl_sparse.setGPUInsertBuffer(ite_gpu.wthr.x,ite_gpu.thr.x);
278 CUDA_LAUNCH((fill_cells_sparse),ite_gpu,cl_sparse.toKernel(),starts.toKernel());
279 cl_sparse.template flush_vd<sstart_<0>>(cells,gpuContext,FLUSH_ON_DEVICE);
280
281 cells_nn.resize(cl_sparse.size()+1);
282 cells_nn.template fill<0>(0);
283
284 // Here we construct the neighborhood cells for each cell
285 auto itgg = cl_sparse.getGPUIterator();
286 CUDA_LAUNCH((count_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel());
287
288 // now we scan
289 openfpm::scan((cnt_type *)cells_nn.template getDeviceBuffer<0>(), cells_nn.size(), (cnt_type *)cells_nn.template getDeviceBuffer<0>() , gpuContext);
290
291 cells_nn.template deviceToHost<0>(cells_nn.size() - 1, cells_nn.size() - 1);
292 size_t n_nn_cells = cells_nn.template get<0>(cells_nn.size() - 1);
293
294 cells_nn_list.resize(n_nn_cells);
295
296 CUDA_LAUNCH((fill_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel(),cells_nn_list.toKernel(),cells.size());
297
298 sorted_to_not_sorted.resize(stop-start);
299 non_sorted_to_sorted.resize(pl.size());
300
301 auto ite = pl.getGPUIteratorTo(stop-start,64);
302
303 // Here we reorder the particles to improve coalescing access
304 CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
305 decltype(pl.toKernel()),
306 decltype(sorted_to_not_sorted.toKernel()),
307 decltype(cells.toKernel()),
308 cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(),
309 pl_prp.toKernel(),
310 pl_prp_out.toKernel(),
311 pl.toKernel(),
312 pl_out.toKernel(),
313 sorted_to_not_sorted.toKernel(),
314 non_sorted_to_sorted.toKernel(),
315 cells.toKernel());
316
317 if (opt == cl_construct_opt::Full)
318 {
319 construct_domain_ids(gpuContext,start,stop,g_m);
320 }
321
322 #else
323
324 std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
325
326 #endif
327 }
328
334 void construct_domain_ids(gpu::ofp_context_t & gpuContext, size_t start, size_t stop, size_t g_m)
335 {
336#ifdef __NVCC__
337 sorted_domain_particles_dg.resize(stop-start+1);
338
339 auto ite = sorted_domain_particles_dg.getGPUIterator();
340
341 CUDA_LAUNCH((mark_domain_particles),ite,sorted_to_not_sorted.toKernel(),sorted_domain_particles_dg.toKernel(),g_m);
342
343 // lets scan
344 openfpm::scan((unsigned int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),sorted_domain_particles_dg.size(),(unsigned int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),gpuContext);
345
346 sorted_domain_particles_dg.template deviceToHost<0>(sorted_domain_particles_dg.size()-1,sorted_domain_particles_dg.size()-1);
347 auto sz = sorted_domain_particles_dg.template get<0>(sorted_domain_particles_dg.size()-1);
348
349 sorted_domain_particles_ids.resize(sz);
350
351 CUDA_LAUNCH((collect_domain_ghost_ids),ite,sorted_domain_particles_dg.toKernel(),sorted_domain_particles_ids.toKernel());
352#endif
353 }
354
359 template<typename vector, typename vector_prp, unsigned int ... prp>
360 void construct_dense(vector & pl,
361 vector & pl_out,
362 vector_prp & pl_prp,
363 vector_prp & pl_prp_out,
364 gpu::ofp_context_t & gpuContext,
365 size_t g_m,
366 size_t start,
367 size_t stop,
368 cl_construct_opt opt = cl_construct_opt::Full)
369 {
370#ifdef __NVCC__
371
372 // Than we construct the ids
373
374 auto ite_gpu = pl.getGPUIteratorTo(stop-start-1);
375
376 cl_n.resize(this->gr_cell.size()+1);
377 cl_n.template fill<0>(0);
378
379 part_ids.resize(stop - start);
380
381 if (ite_gpu.wthr.x == 0 || pl.size() == 0 || stop == 0)
382 {
383 // no particles
384 starts.resize(cl_n.size());
385 starts.template fill<0>(0);
386 return;
387 }
388
389 CUDA_LAUNCH((subindex<false,dim,T,cnt_type,ids_type>),ite_gpu,div_c,
390 spacing_c,
391 off,
392 this->getTransform(),
393 stop,
394 start,
395 pl.toKernel(),
396 cl_n.toKernel(),
397 part_ids.toKernel());
398
399 // now we scan
400 starts.resize(cl_n.size());
401 openfpm::scan((cnt_type *)cl_n.template getDeviceBuffer<0>(), cl_n.size(), (cnt_type *)starts.template getDeviceBuffer<0>() , gpuContext);
402
403 // now we construct the cells
404
405 cells.resize(stop-start);
406 auto itgg = part_ids.getGPUIterator();
407
408
409#ifdef MAKE_CELLLIST_DETERMINISTIC
410
411 CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0,
412 part_ids.size(),
413 cells.toKernel()) );
414
415 // sort
416
417 gpu::mergesort(static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()),static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()),pl.size(),gpu::less_t<cnt_type>(),gpuContext);
418
419#else
420
421 CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0,
422 div_c,
423 off,
424 part_ids.size(),
425 start,
426 starts.toKernel(),
427 part_ids.toKernel(),
428 cells.toKernel() );
429
430#endif
431
432
433 sorted_to_not_sorted.resize(stop-start);
434 non_sorted_to_sorted.resize(pl.size());
435
436 auto ite = pl.getGPUIteratorTo(stop-start,64);
437
438 if (sizeof...(prp) == 0)
439 {
440 // Here we reorder the particles to improve coalescing access
441 CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
442 decltype(pl.toKernel()),
443 decltype(sorted_to_not_sorted.toKernel()),
444 decltype(cells.toKernel()),
445 cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(),
446 pl_prp.toKernel(),
447 pl_prp_out.toKernel(),
448 pl.toKernel(),
449 pl_out.toKernel(),
450 sorted_to_not_sorted.toKernel(),
451 non_sorted_to_sorted.toKernel(),
452 cells.toKernel());
453 }
454 else
455 {
456 // Here we reorder the particles to improve coalescing access
457 CUDA_LAUNCH((reorder_parts_wprp<decltype(pl_prp.toKernel()),
458 decltype(pl.toKernel()),
459 decltype(sorted_to_not_sorted.toKernel()),
460 decltype(cells.toKernel()),
461 cnt_type,shift_ph<0,cnt_type>,prp...>),ite,sorted_to_not_sorted.size(),
462 pl_prp.toKernel(),
463 pl_prp_out.toKernel(),
464 pl.toKernel(),
465 pl_out.toKernel(),
466 sorted_to_not_sorted.toKernel(),
467 non_sorted_to_sorted.toKernel(),
468 cells.toKernel());
469 }
470
471 if (opt == cl_construct_opt::Full)
472 {
473 construct_domain_ids(gpuContext,start,stop,g_m);
474 }
475
476 #else
477
478 std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
479
480 #endif
481 }
482
483public:
484
486 typedef int yes_is_gpu_celllist;
487
489 typedef T stype;
490
492 static const unsigned int dims = dim;
493
495 typedef cnt_type cnt_type_;
496
498 typedef ids_type ids_type_;
499
501 typedef transform transform_;
502
504 typedef boost::mpl::bool_<is_sparse> is_sparse_;
505
511 CellList_gpu(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> & clg)
512 {
513 this->operator=(clg);
514 }
515
521 CellList_gpu(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg)
522 {
523 this->operator=(clg);
524 }
525
530 CellList_gpu()
531 {}
532
533 CellList_gpu(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
534 {
535 Initialize(box,div,pad);
536 }
537
538
547 void Initialize(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
548 {
549 SpaceBox<dim,T> sbox(box);
550
551 // Initialize point transformation
552
553 Initialize(sbox,div,pad);
554 }
555
556 void setBoxNN(unsigned int n_NN)
557 {
558 cells_nn_test_size = n_NN;
559 construct_cell_nn_test(n_NN);
560 }
561
562 void re_setBoxNN()
563 {
564 construct_cell_nn_test(cells_nn_test_size);
565 }
566
575 void Initialize(const SpaceBox<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
576 {
577 Matrix<dim,T> mat;
578 CellDecomposer_sm<dim,T,transform>::setDimensions(box,div, mat, pad);
579
580 // create the array that store the number of particle on each cell and se it to 0
581 InitializeStructures(this->gr_cell.getSize(),this->gr_cell.size(),pad);
582 }
583
584 vector_cnt_type & getSortToNonSort()
585 {
586 return sorted_to_not_sorted;
587 }
588
589 vector_cnt_type & getNonSortToSort()
590 {
591 return non_sorted_to_sorted;
592 }
593
594 vector_cnt_type & getDomainSortIds()
595 {
596 return sorted_domain_particles_ids;
597 }
598
599
605 void setRadius(T radius)
606 {
608
609 NNcalc_rad(radius,nnc_rad_,this->getCellBox(),this->getGrid());
610
611 nnc_rad.resize(nnc_rad_.size(),0);
612
613 // copy to nnc_rad
614
615 for (unsigned int i = 0 ; i < nnc_rad_.size() ; i++)
616 {nnc_rad.template get<0>(i) = nnc_rad_.template get<0>(i);}
617
618 nnc_rad.template hostToDevice<0>();
619 }
620
628 template<typename vector, typename vector_prp, unsigned int ... prp>
629 void construct(vector & pl,
630 vector & pl_out,
631 vector_prp & pl_prp,
632 vector_prp & pl_prp_out,
633 gpu::ofp_context_t & gpuContext,
634 size_t g_m = 0,
635 size_t start = 0,
636 size_t stop = (size_t)-1,
637 cl_construct_opt opt = cl_construct_opt::Full)
638 {
639 // if stop if the default set to the number of particles
640 if (stop == (size_t)-1)
641 {stop = pl.size();}
642
643 if (is_sparse == false) {construct_dense<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,gpuContext,g_m,start,stop,opt);}
644 else {construct_sparse<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,gpuContext,g_m,start,stop,opt);}
645 }
646
648 {
649/* if (nnc_rad.size() == 0) <----- Cannot call this anymore with openMP
650 {
651 // set the radius equal the cell spacing on direction X
652 // (must be initialized to something to avoid warnings)
653 setRadius(this->getCellBox().getHigh(0));
654 }*/
655
656 return CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,
658 decltype(cl_sparse),is_sparse>
659 ::get(starts,
660 cells_nn,
661 cells_nn_list,
662 cl_sparse,
663 sorted_to_not_sorted,
664 sorted_domain_particles_ids,
665 nnc_rad,
666 spacing_c,
667 div_c,
668 off,
669 this->getTransform(),
670 g_m,
671 this->box_unit,
672 this->gr_cell,
673 this->cell_shift);
674 }
675
680 void clear()
681 {
682 cl_n.clear();
683 cells.clear();
684 starts.clear();
685 part_ids.clear();
686 sorted_to_not_sorted.clear();
687 }
688
690
692 size_t g_m = 0;
693
699 inline size_t get_gm()
700 {
701 return g_m;
702 }
703
709 inline void set_gm(size_t g_m)
710 {
711 this->g_m = g_m;
712 }
713
715
721 void set_ndec(size_t n_dec)
722 {
723 this->n_dec = n_dec;
724 }
725
731 size_t get_ndec() const
732 {
733 return n_dec;
734 }
735
737
741 void debug_deviceToHost()
742 {
743 cl_n.template deviceToHost<0>();
744 cells.template deviceToHost<0>();
745 starts.template deviceToHost<0>();
746 }
747
753 size_t getNCells()
754 {
755 return cl_n.size();
756 }
757
763 size_t getNelements(size_t i)
764 {
765 return cl_n.template get<0>(i);
766 }
767
778 inline auto get(size_t cell, size_t ele) -> decltype(cells.template get<0>(starts.template get<0>(cell)+ele))
779 {
780 return cells.template get<0>(starts.template get<0>(cell)+ele);
781 }
782
793 inline auto get(size_t cell, size_t ele) const -> decltype(cells.template get<0>(starts.template get<0>(cell)+ele))
794 {
795 return cells.template get<0>(starts.template get<0>(cell)+ele);
796 }
797
803 void swap(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg)
804 {
805 ((CellDecomposer_sm<dim,T,transform> *)this)->swap(clg);
806 cl_n.swap(clg.cl_n);
807 cells.swap(clg.cells);
808 starts.swap(clg.starts);
809 part_ids.swap(clg.part_ids);
810 cl_sparse.swap(clg.cl_sparse);
811 cells_nn.swap(clg.cells_nn);
812 cells_nn_list.swap(clg.cells_nn_list);
813 cells_nn_test.swap(clg.cells_nn_test);
814 sorted_to_not_sorted.swap(clg.sorted_to_not_sorted);
815 sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg);
816 sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids);
817 non_sorted_to_sorted.swap(clg.non_sorted_to_sorted);
818
819 spacing_c.swap(clg.spacing_c);
820 div_c.swap(clg.div_c);
821 off.swap(clg.off);
822
823 size_t g_m_tmp = g_m;
824 g_m = clg.g_m;
825 clg.g_m = g_m_tmp;
826
827 size_t n_dec_tmp = n_dec;
828 n_dec = clg.n_dec;
829 clg.n_dec = n_dec_tmp;
830
831 int cells_nn_test_size_tmp = cells_nn_test_size;
832 cells_nn_test_size = clg.cells_nn_test_size;
833 clg.cells_nn_test_size = cells_nn_test_size_tmp;
834 }
835
836 CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> &
837 operator=(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg)
838 {
839 *static_cast<CellDecomposer_sm<dim,T,transform> *>(this) = *static_cast<const CellDecomposer_sm<dim,T,transform> *>(&clg);
840 cl_n = clg.cl_n;
841 cells = clg.cells;
842 starts = clg.starts;
843 part_ids = clg.part_ids;
844 cl_sparse = clg.cl_sparse;
845 cells_nn = clg.cells_nn;
846 cells_nn_list = clg.cells_nn_list;
847 cells_nn_test = clg.cells_nn_test;
848 sorted_to_not_sorted = clg.sorted_to_not_sorted;
849 sorted_domain_particles_dg = clg.sorted_domain_particles_dg;
850 sorted_domain_particles_ids = clg.sorted_domain_particles_ids;
851 non_sorted_to_sorted = clg.non_sorted_to_sorted;
852
853 spacing_c = clg.spacing_c;
854 div_c = clg.div_c;
855 off = clg.off;
856 g_m = clg.g_m;
857 n_dec = clg.n_dec;
858
859 cells_nn_test_size = clg.cells_nn_test_size;
860
861 return *this;
862 }
863
864 CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> &
865 operator=(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg)
866 {
867 static_cast<CellDecomposer_sm<dim,T,transform> *>(this)->swap(*static_cast<CellDecomposer_sm<dim,T,transform> *>(&clg));
868 cl_n.swap(clg.cl_n);
869 cells.swap(clg.cells);
870 starts.swap(clg.starts);
871 part_ids.swap(clg.part_ids);
872 cl_sparse.swap(clg.cl_sparse);
873 cells_nn.swap(clg.cells_nn);
874 cells_nn_list.swap(clg.cells_nn_list);
875 cells_nn_test.swap(clg.cells_nn_test);
876 sorted_to_not_sorted.swap(clg.sorted_to_not_sorted);
877 sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg);
878 sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids);
879 non_sorted_to_sorted.swap(clg.non_sorted_to_sorted);
880
881 spacing_c = clg.spacing_c;
882 div_c = clg.div_c;
883 off = clg.off;
884 g_m = clg.g_m;
885 n_dec = clg.n_dec;
886
887 cells_nn_test_size = clg.cells_nn_test_size;
888
889 return *this;
890 }
891};
892
893// This is a tranformation node for vector_distributed for the algorithm toKernel_tranform
894template<template <typename> class layout_base, typename T>
895struct toKernel_transform<layout_base,T,4>
896{
897 typedef CellList_gpu_ker<T::dims,
898 typename T::stype,
899 typename T::cnt_type_,
900 typename T::ids_type_,
901 typename T::transform_,
902 T::is_sparse_::value> type;
903};
904
905#endif
906
907#endif /* OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_ */
This class represent an N-dimensional box.
Definition Box.hpp:61
This class implement an NxN (dense) matrix.
Definition Matrix.hpp:33
This class implement the point shape in an N-dimensional space.
Definition Point.hpp:28
This class represent an N-dimensional box.
Definition SpaceBox.hpp:27
Declaration grid_key_dx_iterator_sub.
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition grid_key.hpp:516
Declaration grid_sm.
Definition grid_sm.hpp:167
No transformation.
void clear()
Clear all from all the elements.
vector_sparse_gpu_ker< T, Ti, layout_base > toKernel()
toKernel function transform this structure into one that can be used on GPU
size_t size()
Return how many element you have in this map.
void setGPUInsertBuffer(int nblock, int nslot)
set the gpu insert buffer for every block
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Transform the boost::fusion::vector into memory specification (memory_traits)