OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cuda_grid_gpu_funcs.cuh
1 /*
2  * cuda_grid_gpu_funcs.cuh
3  *
4  * Created on: Aug 20, 2018
5  * Author: i-bird
6  */
7 
8 #ifndef CUDA_GRID_GPU_FUNCS_CUH_
9 #define CUDA_GRID_GPU_FUNCS_CUH_
10 
11 #include "config.h"
12 #include "util/cuda_launch.hpp"
13 #include "map_grid_cuda_ker.cuh"
14 
15 #if defined(CUDA_GPU) && defined(__NVCC__)
16 
17 template<unsigned int dim, typename grid_type>
18 __global__ void copy_ndim_grid_block_device(grid_type src, grid_type dst)
19 {
20  unsigned int i = blockIdx.x;
21 
22  if (i >= src.getGrid().size() || i >= dst.getGrid().size())
23  {return;}
24 
25  auto key_src = src.getGrid().InvLinId(i);
26 
27  dst.get_o(key_src) = src.get_o(key_src);
28 };
29 
30 template<unsigned int dim, typename grid_type>
31 struct copy_ndim_grid_impl
32 {
33  static __device__ void copy(grid_type & src, grid_type & dst)
34  {
35  unsigned int i = threadIdx.x + blockIdx.x * blockDim.x;
36 
37  if (i >= src.getGrid().size() || i >= dst.getGrid().size())
38  {return;}
39 
40  auto key_src = src.getGrid().InvLinId(i);
41 
42  dst.get_o(key_src) = src.get_o(key_src);
43  }
44 };
45 
46 template<typename grid_type>
47 struct copy_ndim_grid_impl<2,grid_type>
48 {
49  static __device__ void copy(grid_type & src, grid_type & dst)
50  {
51  grid_key_dx<2> key_src;
52  key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x);
53  key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y);
54 
55  if (key_src.get(0) >= src.getGrid().size(0)) {return;}
56  if (key_src.get(1) >= src.getGrid().size(1)) {return;}
57 
58  if (key_src.get(0) >= dst.getGrid().size(0)) {return;}
59  if (key_src.get(1) >= dst.getGrid().size(1)) {return;}
60 
61  dst.get_o(key_src) = src.get_o(key_src);
62  }
63 };
64 
65 template<typename grid_type>
66 struct copy_ndim_grid_impl<3,grid_type>
67 {
68  static __device__ void copy(grid_type & src, grid_type & dst)
69  {
70  grid_key_dx<3> key_src;
71  key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x);
72  key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y);
73  key_src.set_d(2,threadIdx.y + blockIdx.y * blockDim.y);
74 
75  if (key_src.get(0) >= src.getGrid().size(0)) {return;}
76  if (key_src.get(1) >= src.getGrid().size(1)) {return;}
77  if (key_src.get(2) >= src.getGrid().size(2)) {return;}
78 
79  if (key_src.get(0) >= dst.getGrid().size(0)) {return;}
80  if (key_src.get(1) >= dst.getGrid().size(1)) {return;}
81  if (key_src.get(2) >= dst.getGrid().size(2)) {return;}
82 
83  dst.get_o(key_src) = src.get_o(key_src);
84  }
85 };
86 
87 template<unsigned int dim, typename grid_type>
88 __global__ void copy_ndim_grid_device(grid_type src, grid_type dst)
89 {
90  copy_ndim_grid_impl<dim,grid_type>::copy(src,dst);
91 }
92 
93 
94 #endif
95 
96 
97 template<bool inte_or_lin, typename base_grid, unsigned int dim, typename T>
99 {
100  template<typename grid_type> static base_grid toKernel(grid_type & gc)
101  {
102  /*grid_gpu_ker<dim,T,memory_traits_lin,typename grid_type::linearizer_type>*/base_grid g(gc.getGrid());
103 
104  g.get_data_().disable_manage_memory();
105  g.get_data_().mem = gc.get_internal_data_().mem;
106  // Increment the reference of mem
107  //g.get_data_().mem->incRef();
108  g.get_data_().mem_r.bind_ref(gc.get_internal_data_().mem_r);
109  g.get_data_().switchToDevicePtr();
110 
111  return g;
112  }
113 };
114 
115 template<typename base_grid, unsigned int dim, typename T>
116 struct grid_toKernelImpl<true,base_grid,dim,T>
117 {
118  template<typename grid_type> static base_grid toKernel(grid_type & gc)
119  {
120  /*grid_gpu_ker<dim,T,memory_traits_inte, typename grid_type::linearizer_type>*/ base_grid g(gc.getGrid());
121  copy_switch_memory_c_no_cpy<typename std::remove_reference<decltype(gc.get_internal_data_())>::type,
122  typename std::remove_reference<decltype(g.get_data_())>::type> cp_mc(gc.get_internal_data_(),g.get_data_());
123 
124  boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(cp_mc);
125 
126  return g;
127  }
128 };
129 
130 #endif /* CUDA_GRID_GPU_FUNCS_CUH_ */
this class is a functor for "for_each" algorithm
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:18
size_t size() const
Return the total number of points in the grid.
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition: grid_key.hpp:503
This is a distributed grid.
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition: grid_key.hpp:516