OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
CellList_gpu.hpp
1 /*
2  * CellList_gpu.hpp
3  *
4  * Created on: Jun 11, 2018
5  * Author: i-bird
6  */
7 
8 #ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
9 #define OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_
10 
11 #include "config.h"
12 
13 #ifdef CUDA_GPU
14 
15 #include "Vector/map_vector_sparse.hpp"
16 #include "NN/CellList/CellDecomposer.hpp"
17 #include "Vector/map_vector.hpp"
18 #include "Cuda_cell_list_util_func.hpp"
19 #include "NN/CellList/cuda/CellList_gpu_ker.cuh"
20 #include "util/cuda_util.hpp"
21 #include "NN/CellList/CellList_util.hpp"
22 #include "NN/CellList/CellList.hpp"
23 #include "util/cuda/scan_ofp.cuh"
24 
25 constexpr int count = 0;
26 constexpr int start = 1;
27 
28 template<unsigned int dim, typename T,
29  typename cnt_type, typename ids_type,
30  typename Memory,typename transform,
31  typename vector_cnt_type, typename vector_cnt_type2,
32  typename cl_sparse_type,
33  bool is_sparse>
34 struct CellList_gpu_ker_selector
35 {
36  static inline CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse> get(vector_cnt_type & starts,
37  vector_cnt_type & cell_nn,
38  vector_cnt_type2 & cell_nn_list,
39  cl_sparse_type & cl_sparse,
40  vector_cnt_type & sorted_to_not_sorted,
41  vector_cnt_type & sorted_domain_particles_ids,
46  const transform & t,
47  unsigned int g_m)
48  {
50  sorted_to_not_sorted.toKernel(),
51  sorted_domain_particles_ids.toKernel(),
52  nnc_rad.toKernel(),
53  spacing_c,
54  div_c,
55  off,
56  t,
57  g_m);
58  }
59 };
60 
61 template<unsigned int dim, typename T,
62  typename cnt_type, typename ids_type,
63  typename Memory,typename transform,
64  typename vector_cnt_type, typename vector_cnt_type2,
65  typename cl_sparse_type>
66 struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector_cnt_type,vector_cnt_type2,cl_sparse_type,true>
67 {
68  static CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true> get(vector_cnt_type & starts,
69  vector_cnt_type & cell_nn,
70  vector_cnt_type2 & cell_nn_list,
71  cl_sparse_type & cl_sparse,
72  vector_cnt_type & srt,
73  vector_cnt_type & dprt,
78  const transform & t,
79  unsigned int g_m)
80  {
82  cell_nn_list.toKernel(),
83  cl_sparse.toKernel(),
84  srt.toKernel(),
85  dprt.toKernel(),
86  spacing_c,
87  div_c,
88  off,
89  t,g_m);
90  }
91 };
92 
93 template<unsigned int dim,
94  typename T,
95  typename Memory,
96  typename transform = no_transform_only<dim,T>,
97  typename cnt_type = unsigned int,
98  typename ids_type = int,
99  bool is_sparse = false>
100 class CellList_gpu : public CellDecomposer_sm<dim,T,transform>
101 {
102  typedef openfpm::vector<aggregate<cnt_type>,Memory,memory_traits_inte> vector_cnt_type;
103 
105  vector_cnt_type cl_n;
106 
108  vector_cnt_type cells;
109 
111  vector_cnt_type starts;
112 
115 
118 
121 
124 
126  int cells_nn_test_size;
127 
129  openfpm::vector_gpu<aggregate<int>> cells_nn_test;
130 
132  vector_cnt_type sorted_to_not_sorted;
133 
135  vector_cnt_type sorted_domain_particles_dg;
136 
138  vector_cnt_type sorted_domain_particles_ids;
139 
141  vector_cnt_type non_sorted_to_sorted;
142 
145 
148 
151 
154 
157  size_t n_dec;
158 
160  void InitializeStructures(const size_t (& div)[dim], size_t tot_n_cell, size_t pad)
161  {
162  for (size_t i = 0 ; i < dim ; i++)
163  {
164  div_c[i] = div[i];
165  spacing_c[i] = this->getCellBox().getP2().get(i);
166  off[i] = pad;
167  }
168 
169  cl_n.resize(tot_n_cell);
170 
171  cells_nn_test_size = 1;
172  construct_cell_nn_test(cells_nn_test_size);
173  }
174 
175  void construct_cell_nn_test(unsigned int box_nn = 1)
176  {
177  auto & gs = this->getGrid();
178 
179  grid_key_dx<dim> start;
180  grid_key_dx<dim> stop;
181  grid_key_dx<dim> middle;
182 
183  for (size_t i = 0 ; i < dim ; i++)
184  {
185  start.set_d(i,0);
186  stop.set_d(i,2*box_nn);
187  middle.set_d(i,box_nn);
188  }
189 
190  cells_nn_test.resize(openfpm::math::pow(2*box_nn+1,dim));
191 
192  int mid = gs.LinId(middle);
193 
194  grid_key_dx_iterator_sub<dim> it(gs,start,stop);
195 
196  size_t i = 0;
197  while (it.isNext())
198  {
199  auto p = it.get();
200 
201  cells_nn_test.template get<0>(i) = (int)gs.LinId(p) - mid;
202 
203  ++i;
204  ++it;
205  }
206 
207  cells_nn_test.template hostToDevice<0>();
208 
209 #if defined(__NVCC__) && defined(USE_LOW_REGISTER_ITERATOR)
210 
211  // copy to the constant memory
212  cudaMemcpyToSymbol(cells_striding,cells_nn_test.template getPointer<0>(),cells_nn_test.size()*sizeof(int));
213 
214 #endif
215  }
216 
221  template<typename vector, typename vector_prp, unsigned int ... prp>
222  void construct_sparse(vector & pl,
223  vector & pl_out,
224  vector_prp & pl_prp,
225  vector_prp & pl_prp_out,
226  mgpu::ofp_context_t & mgpuContext,
227  size_t g_m,
228  size_t start,
229  size_t stop,
230  cl_construct_opt opt = cl_construct_opt::Full)
231  {
232 #ifdef __NVCC__
233 
234  part_ids.resize(stop - start);
235  starts.resize(stop - start);
236 
237  // Than we construct the ids
238 
239  auto ite_gpu = pl.getGPUIteratorTo(stop-start);
240 
241  if (ite_gpu.wthr.x == 0)
242  {
243  return;
244  }
245 
246  CUDA_LAUNCH((subindex<true,dim,T,cnt_type,ids_type>),ite_gpu,div_c,
247  spacing_c,
248  off,
249  this->getTransform(),
250  pl.capacity(),
251  pl.size(),
252  part_ids.capacity(),
253  start,
254  static_cast<T *>(pl.template getDeviceBuffer<0>()),
255  static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()),
256  static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()));
257 
258  // now we construct the cells
259 
260  cells.resize(stop-start);
261 
262  // Here we fill the sparse vector
263  cl_sparse.clear();
264  cl_sparse.template setBackground<0>((cnt_type)-1);
265  cl_sparse.setGPUInsertBuffer(ite_gpu.wthr.x,ite_gpu.thr.x);
266  CUDA_LAUNCH((fill_cells_sparse),ite_gpu,cl_sparse.toKernel(),starts.toKernel());
267  cl_sparse.template flush_vd<sstart_<0>>(cells,mgpuContext,FLUSH_ON_DEVICE);
268 
269  cells_nn.resize(cl_sparse.size()+1);
270  cells_nn.template fill<0>(0);
271 
272  // Here we construct the neighborhood cells for each cell
273  auto itgg = cl_sparse.getGPUIterator();
274  CUDA_LAUNCH((count_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel());
275 
276  // now we scan
277  openfpm::scan((cnt_type *)cells_nn.template getDeviceBuffer<0>(), cells_nn.size(), (cnt_type *)cells_nn.template getDeviceBuffer<0>() , mgpuContext);
278 
279  cells_nn.template deviceToHost<0>(cells_nn.size() - 1, cells_nn.size() - 1);
280  size_t n_nn_cells = cells_nn.template get<0>(cells_nn.size() - 1);
281 
282  cells_nn_list.resize(n_nn_cells);
283 
284  CUDA_LAUNCH((fill_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel(),cells_nn_list.toKernel(),cells.size());
285 
286  sorted_to_not_sorted.resize(stop-start);
287  non_sorted_to_sorted.resize(pl.size());
288 
289  auto ite = pl.getGPUIteratorTo(stop-start,64);
290 
291  // Here we reorder the particles to improve coalescing access
292  CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
293  decltype(pl.toKernel()),
294  decltype(sorted_to_not_sorted.toKernel()),
295  cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(),
296  pl_prp.toKernel(),
297  pl_prp_out.toKernel(),
298  pl.toKernel(),
299  pl_out.toKernel(),
300  sorted_to_not_sorted.toKernel(),
301  non_sorted_to_sorted.toKernel(),
302  static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
303 
304  if (opt == cl_construct_opt::Full)
305  {
306  construct_domain_ids(mgpuContext,start,stop,g_m);
307  }
308 
309  #else
310 
311  std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
312 
313  #endif
314  }
315 
321  void construct_domain_ids(mgpu::ofp_context_t & mgpuContext, size_t start, size_t stop, size_t g_m)
322  {
323 #ifdef __NVCC__
324  sorted_domain_particles_dg.resize(stop-start+1);
325 
326  auto ite = sorted_domain_particles_dg.getGPUIterator();
327 
328  CUDA_LAUNCH((mark_domain_particles),ite,sorted_to_not_sorted.toKernel(),sorted_domain_particles_dg.toKernel(),g_m);
329 
330  // lets scan
331  openfpm::scan((unsigned int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),sorted_domain_particles_dg.size(),(unsigned int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),mgpuContext);
332 
333  sorted_domain_particles_dg.template deviceToHost<0>(sorted_domain_particles_dg.size()-1,sorted_domain_particles_dg.size()-1);
334  auto sz = sorted_domain_particles_dg.template get<0>(sorted_domain_particles_dg.size()-1);
335 
336  sorted_domain_particles_ids.resize(sz);
337 
338  CUDA_LAUNCH((collect_domain_ghost_ids),ite,sorted_domain_particles_dg.toKernel(),sorted_domain_particles_ids.toKernel());
339 #endif
340  }
341 
346  template<typename vector, typename vector_prp, unsigned int ... prp>
347  void construct_dense(vector & pl,
348  vector & pl_out,
349  vector_prp & pl_prp,
350  vector_prp & pl_prp_out,
351  mgpu::ofp_context_t & mgpuContext,
352  size_t g_m,
353  size_t start,
354  size_t stop,
355  cl_construct_opt opt = cl_construct_opt::Full)
356  {
357 #ifdef __NVCC__
358 
359  // Than we construct the ids
360 
361  auto ite_gpu = pl.getGPUIteratorTo(stop-start-1);
362 
363  cl_n.resize(this->gr_cell.size()+1);
364  cl_n.template fill<0>(0);
365 
366  part_ids.resize(stop - start);
367 
368  if (ite_gpu.wthr.x == 0 || pl.size() == 0 || stop == 0)
369  {
370  // no particles
371  starts.resize(cl_n.size());
372  starts.template fill<0>(0);
373  return;
374  }
375 
376  CUDA_LAUNCH((subindex<false,dim,T,cnt_type,ids_type>),ite_gpu,div_c,
377  spacing_c,
378  off,
379  this->getTransform(),
380  pl.capacity(),
381  stop,
382  part_ids.capacity(),
383  start,
384  static_cast<T *>(pl.template getDeviceBuffer<0>()),
385  static_cast<cnt_type *>(cl_n.template getDeviceBuffer<0>()),
386  static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()));
387 
388  // now we scan
389  starts.resize(cl_n.size());
390  openfpm::scan((cnt_type *)cl_n.template getDeviceBuffer<0>(), cl_n.size(), (cnt_type *)starts.template getDeviceBuffer<0>() , mgpuContext);
391 
392  // now we construct the cells
393 
394  cells.resize(stop-start);
395  auto itgg = part_ids.getGPUIterator();
396 
397 
398 #ifdef MAKE_CELLLIST_DETERMINISTIC
399 
400  CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0,
401  part_ids.size(),
402  static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) );
403 
404  // sort
405 
406  mgpu::mergesort(static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()),static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()),pl.size(),mgpu::less_t<cnt_type>(),mgpuContext);
407 
408 #else
409 
410  CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0,
411  div_c,
412  off,
413  part_ids.size(),
414  part_ids.capacity(),
415  start,
416  static_cast<cnt_type *>(starts.template getDeviceBuffer<0>()),
417  static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()),
418  static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()) );
419 
420 #endif
421 
422 
423  sorted_to_not_sorted.resize(stop-start);
424  non_sorted_to_sorted.resize(pl.size());
425 
426  auto ite = pl.getGPUIteratorTo(stop-start,64);
427 
428  if (sizeof...(prp) == 0)
429  {
430  // Here we reorder the particles to improve coalescing access
431  CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),
432  decltype(pl.toKernel()),
433  decltype(sorted_to_not_sorted.toKernel()),
434  cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(),
435  pl_prp.toKernel(),
436  pl_prp_out.toKernel(),
437  pl.toKernel(),
438  pl_out.toKernel(),
439  sorted_to_not_sorted.toKernel(),
440  non_sorted_to_sorted.toKernel(),
441  static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
442  }
443  else
444  {
445  // Here we reorder the particles to improve coalescing access
446  CUDA_LAUNCH((reorder_parts_wprp<decltype(pl_prp.toKernel()),
447  decltype(pl.toKernel()),
448  decltype(sorted_to_not_sorted.toKernel()),
449  cnt_type,shift_ph<0,cnt_type>,prp...>),ite,sorted_to_not_sorted.size(),
450  pl_prp.toKernel(),
451  pl_prp_out.toKernel(),
452  pl.toKernel(),
453  pl_out.toKernel(),
454  sorted_to_not_sorted.toKernel(),
455  non_sorted_to_sorted.toKernel(),
456  static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()));
457  }
458 
459  if (opt == cl_construct_opt::Full)
460  {
461  construct_domain_ids(mgpuContext,start,stop,g_m);
462  }
463 
464  #else
465 
466  std::cout << "Error: " << __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;
467 
468  #endif
469  }
470 
471 public:
472 
474  typedef int yes_is_gpu_celllist;
475 
477  typedef T stype;
478 
480  static const unsigned int dims = dim;
481 
483  typedef cnt_type cnt_type_;
484 
486  typedef ids_type ids_type_;
487 
489  typedef transform transform_;
490 
492  typedef boost::mpl::bool_<is_sparse> is_sparse_;
493 
499  CellList_gpu(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> & clg)
500  {
501  this->operator=(clg);
502  }
503 
509  CellList_gpu(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg)
510  {
511  this->operator=(clg);
512  }
513 
518  CellList_gpu()
519  {}
520 
521  CellList_gpu(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
522  {
523  Initialize(box,div,pad);
524  }
525 
526 
535  void Initialize(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
536  {
537  SpaceBox<dim,T> sbox(box);
538 
539  // Initialize point transformation
540 
541  Initialize(sbox,div,pad);
542  }
543 
544  void setBoxNN(unsigned int n_NN)
545  {
546  cells_nn_test_size = n_NN;
547  construct_cell_nn_test(n_NN);
548  }
549 
550  void re_setBoxNN()
551  {
552  construct_cell_nn_test(cells_nn_test_size);
553  }
554 
563  void Initialize(const SpaceBox<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)
564  {
565  Matrix<dim,T> mat;
566  CellDecomposer_sm<dim,T,transform>::setDimensions(box,div, mat, pad);
567 
568  // create the array that store the number of particle on each cell and se it to 0
569  InitializeStructures(this->gr_cell.getSize(),this->gr_cell.size(),pad);
570  }
571 
572  vector_cnt_type & getSortToNonSort()
573  {
574  return sorted_to_not_sorted;
575  }
576 
577  vector_cnt_type & getNonSortToSort()
578  {
579  return non_sorted_to_sorted;
580  }
581 
582  vector_cnt_type & getDomainSortIds()
583  {
584  return sorted_domain_particles_ids;
585  }
586 
587 
593  void setRadius(T radius)
594  {
595  openfpm::vector<long int> nnc_rad_;
596 
597  NNcalc_rad(radius,nnc_rad_,this->getCellBox(),this->getGrid());
598 
599  nnc_rad.resize(nnc_rad_.size(),0);
600 
601  // copy to nnc_rad
602 
603  for (unsigned int i = 0 ; i < nnc_rad_.size() ; i++)
604  {nnc_rad.template get<0>(i) = nnc_rad_.template get<0>(i);}
605 
606  nnc_rad.template hostToDevice<0>();
607  }
608 
616  template<typename vector, typename vector_prp, unsigned int ... prp>
617  void construct(vector & pl,
618  vector & pl_out,
619  vector_prp & pl_prp,
620  vector_prp & pl_prp_out,
621  mgpu::ofp_context_t & mgpuContext,
622  size_t g_m = 0,
623  size_t start = 0,
624  size_t stop = (size_t)-1,
625  cl_construct_opt opt = cl_construct_opt::Full)
626  {
627  // if stop if the default set to the number of particles
628  if (stop == (size_t)-1)
629  {stop = pl.size();}
630 
631  if (is_sparse == false) {construct_dense<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,mgpuContext,g_m,start,stop,opt);}
632  else {construct_sparse<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,mgpuContext,g_m,start,stop,opt);}
633  }
634 
636  {
637 /* if (nnc_rad.size() == 0) <----- Cannot call this anymore with openMP
638  {
639  // set the radius equal the cell spacing on direction X
640  // (must be initialized to something to avoid warnings)
641  setRadius(this->getCellBox().getHigh(0));
642  }*/
643 
644  return CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,
646  decltype(cl_sparse),is_sparse>
647  ::get(starts,
648  cells_nn,
649  cells_nn_list,
650  cl_sparse,
651  sorted_to_not_sorted,
652  sorted_domain_particles_ids,
653  nnc_rad,
654  spacing_c,
655  div_c,
656  off,
657  this->getTransform(),
658  g_m);
659  }
660 
665  void clear()
666  {
667  cl_n.clear();
668  cells.clear();
669  starts.clear();
670  part_ids.clear();
671  sorted_to_not_sorted.clear();
672  }
673 
675 
677  size_t g_m = 0;
678 
684  inline size_t get_gm()
685  {
686  return g_m;
687  }
688 
694  inline void set_gm(size_t g_m)
695  {
696  this->g_m = g_m;
697  }
698 
700 
706  void set_ndec(size_t n_dec)
707  {
708  this->n_dec = n_dec;
709  }
710 
716  size_t get_ndec() const
717  {
718  return n_dec;
719  }
720 
722 
726  void debug_deviceToHost()
727  {
728  cl_n.template deviceToHost<0>();
729  cells.template deviceToHost<0>();
730  starts.template deviceToHost<0>();
731  }
732 
738  size_t getNCells()
739  {
740  return cl_n.size();
741  }
742 
748  size_t getNelements(size_t i)
749  {
750  return cl_n.template get<0>(i);
751  }
752 
763  inline auto get(size_t cell, size_t ele) -> decltype(cells.template get<0>(starts.template get<0>(cell)+ele))
764  {
765  return cells.template get<0>(starts.template get<0>(cell)+ele);
766  }
767 
778  inline auto get(size_t cell, size_t ele) const -> decltype(cells.template get<0>(starts.template get<0>(cell)+ele))
779  {
780  return cells.template get<0>(starts.template get<0>(cell)+ele);
781  }
782 
788  void swap(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg)
789  {
790  ((CellDecomposer_sm<dim,T,transform> *)this)->swap(clg);
791  cl_n.swap(clg.cl_n);
792  cells.swap(clg.cells);
793  starts.swap(clg.starts);
794  part_ids.swap(clg.part_ids);
795  cl_sparse.swap(clg.cl_sparse);
796  cells_nn.swap(clg.cells_nn);
797  cells_nn_list.swap(clg.cells_nn_list);
798  cells_nn_test.swap(clg.cells_nn_test);
799  sorted_to_not_sorted.swap(clg.sorted_to_not_sorted);
800  sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg);
801  sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids);
802  non_sorted_to_sorted.swap(clg.non_sorted_to_sorted);
803 
804  spacing_c.swap(clg.spacing_c);
805  div_c.swap(clg.div_c);
806  off.swap(clg.off);
807 
808  size_t g_m_tmp = g_m;
809  g_m = clg.g_m;
810  clg.g_m = g_m_tmp;
811 
812  size_t n_dec_tmp = n_dec;
813  n_dec = clg.n_dec;
814  clg.n_dec = n_dec_tmp;
815 
816  int cells_nn_test_size_tmp = cells_nn_test_size;
817  cells_nn_test_size = clg.cells_nn_test_size;
818  clg.cells_nn_test_size = cells_nn_test_size_tmp;
819  }
820 
821  CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> &
822  operator=(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg)
823  {
824  *static_cast<CellDecomposer_sm<dim,T,transform> *>(this) = *static_cast<const CellDecomposer_sm<dim,T,transform> *>(&clg);
825  cl_n = clg.cl_n;
826  cells = clg.cells;
827  starts = clg.starts;
828  part_ids = clg.part_ids;
829  cl_sparse = clg.cl_sparse;
830  cells_nn = clg.cells_nn;
831  cells_nn_list = clg.cells_nn_list;
832  cells_nn_test = clg.cells_nn_test;
833  sorted_to_not_sorted = clg.sorted_to_not_sorted;
834  sorted_domain_particles_dg = clg.sorted_domain_particles_dg;
835  sorted_domain_particles_ids = clg.sorted_domain_particles_ids;
836  non_sorted_to_sorted = clg.non_sorted_to_sorted;
837 
838  spacing_c = clg.spacing_c;
839  div_c = clg.div_c;
840  off = clg.off;
841  g_m = clg.g_m;
842  n_dec = clg.n_dec;
843 
844  cells_nn_test_size = clg.cells_nn_test_size;
845 
846  return *this;
847  }
848 
849  CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> &
850  operator=(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg)
851  {
852  static_cast<CellDecomposer_sm<dim,T,transform> *>(this)->swap(*static_cast<CellDecomposer_sm<dim,T,transform> *>(&clg));
853  cl_n.swap(clg.cl_n);
854  cells.swap(clg.cells);
855  starts.swap(clg.starts);
856  part_ids.swap(clg.part_ids);
857  cl_sparse.swap(clg.cl_sparse);
858  cells_nn.swap(clg.cells_nn);
859  cells_nn_list.swap(clg.cells_nn_list);
860  cells_nn_test.swap(clg.cells_nn_test);
861  sorted_to_not_sorted.swap(clg.sorted_to_not_sorted);
862  sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg);
863  sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids);
864  non_sorted_to_sorted.swap(clg.non_sorted_to_sorted);
865 
866  spacing_c = clg.spacing_c;
867  div_c = clg.div_c;
868  off = clg.off;
869  g_m = clg.g_m;
870  n_dec = clg.n_dec;
871 
872  cells_nn_test_size = clg.cells_nn_test_size;
873 
874  return *this;
875  }
876 };
877 
878 // This is a tranformation node for vector_distributed for the algorithm toKernel_tranform
879 template<template <typename> class layout_base, typename T>
880 struct toKernel_transform<layout_base,T,4>
881 {
882  typedef CellList_gpu_ker<T::dims,
883  typename T::stype,
884  typename T::cnt_type_,
885  typename T::ids_type_,
886  typename T::transform_,
887  T::is_sparse_::value> type;
888 };
889 
890 #endif
891 
892 #endif /* OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_ */
This class represent an N-dimensional box.
Definition: SpaceBox.hpp:26
void setGPUInsertBuffer(int nblock, int nslot)
set the gpu insert buffer for every block
size_t size()
Stub size.
Definition: map_vector.hpp:211
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition: memory_conf.hpp:83
This class implement an NxN (dense) matrix.
Definition: Matrix.hpp:32
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
size_t size()
Return how many element you have in this map.
This class represent an N-dimensional box.
Definition: Box.hpp:60
vector_sparse_gpu_ker< T, Ti, layout_base > toKernel()
toKernel function transform this structure into one that can be used on GPU
void clear()
Clear all from all the elements.
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition: grid_key.hpp:516
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202
No transformation.