OpenFPM  5.2.0
Project that contain the implementation of distributed structures
cuda_grid_gpu_funcs.cuh
1 /*
2  * cuda_grid_gpu_funcs.cuh
3  *
4  * Created on: Aug 20, 2018
5  * Author: i-bird
6  */
7 
8 #ifndef CUDA_GRID_GPU_FUNCS_CUH_
9 #define CUDA_GRID_GPU_FUNCS_CUH_
10 
11 #include "config.h"
12 #include "util/cuda_util.hpp"
13 #include "map_grid_cuda_ker.cuh"
14 
15 #if defined(CUDA_GPU) && defined(__NVCC__)
16 
17 template<unsigned int dim, typename grid_type>
18 __global__ void copy_ndim_grid_block_device(grid_type src, grid_type dst)
19 {
20  unsigned int i = blockIdx.x;
21 
22  if (i >= src.getGrid().size() || i >= dst.getGrid().size())
23  {return;}
24 
25  auto key_src = src.getGrid().InvLinId(i);
26 
27  dst.get_o(key_src) = src.get_o(key_src);
28 };
29 
30 template<unsigned int dim, typename grid_type>
31 struct copy_ndim_grid_impl
32 {
33  static __device__ void copy(grid_type & src, grid_type & dst)
34  {
35  unsigned int i = threadIdx.x + blockIdx.x * blockDim.x;
36 
37  if (i >= src.getGrid().size() || i >= dst.getGrid().size())
38  {return;}
39 
40  auto key_src = src.getGrid().InvLinId(i);
41 
42  dst.get_o(key_src) = src.get_o(key_src);
43  }
44 };
45 
46 template<typename grid_type>
47 struct copy_ndim_grid_impl<2,grid_type>
48 {
49  static __device__ void copy(grid_type & src, grid_type & dst)
50  {
51  grid_key_dx<2> key_src;
52  key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x);
53  key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y);
54 
55  if (key_src.get(0) >= src.getGrid().size(0)) {return;}
56  if (key_src.get(1) >= src.getGrid().size(1)) {return;}
57 
58  if (key_src.get(0) >= dst.getGrid().size(0)) {return;}
59  if (key_src.get(1) >= dst.getGrid().size(1)) {return;}
60 
61  dst.get_o(key_src) = src.get_o(key_src);
62  }
63 };
64 
65 template<typename grid_type>
66 struct copy_ndim_grid_impl<3,grid_type>
67 {
68  static __device__ void copy(grid_type & src, grid_type & dst)
69  {
70  grid_key_dx<3> key_src;
71  key_src.set_d(0,threadIdx.x + blockIdx.x * blockDim.x);
72  key_src.set_d(1,threadIdx.y + blockIdx.y * blockDim.y);
73  key_src.set_d(2,threadIdx.y + blockIdx.y * blockDim.y);
74 
75  if (key_src.get(0) >= src.getGrid().size(0)) {return;}
76  if (key_src.get(1) >= src.getGrid().size(1)) {return;}
77  if (key_src.get(2) >= src.getGrid().size(2)) {return;}
78 
79  if (key_src.get(0) >= dst.getGrid().size(0)) {return;}
80  if (key_src.get(1) >= dst.getGrid().size(1)) {return;}
81  if (key_src.get(2) >= dst.getGrid().size(2)) {return;}
82 
83  dst.get_o(key_src) = src.get_o(key_src);
84  }
85 };
86 
87 template<unsigned int dim, typename grid_type>
88 __global__ void copy_ndim_grid_device(grid_type src, grid_type dst)
89 {
90  copy_ndim_grid_impl<dim,grid_type>::copy(src,dst);
91 }
92 
93 
94 #endif
95 
96 
97 template<bool inte_or_lin, typename base_grid, unsigned int dim, typename T, typename S>
99 {
100  template<typename grid_type> static base_grid toKernel(grid_type & gc)
101  {
102  /*grid_gpu_ker<dim,T,memory_traits_lin,typename grid_type::linearizer_type>*/base_grid g(gc.getGrid());
103  auto &grid_layout = g.get_data_();
104 
105  grid_layout.disable_manage_memory();
106  grid_layout.mem = gc.get_internal_data_().mem;
107  // Increment the reference of mem
108  //grid_layout.mem->incRef();
109  grid_layout.mem_r.bind_ref(gc.get_internal_data_().mem_r);
110  if (grid_layout.mem)
111  {grid_layout.mem_r.set_pointer(((S*)grid_layout.mem)->getDevicePointer());}
112 
113  return g;
114  }
115 };
116 
117 template<typename base_grid, unsigned int dim, typename T, typename S>
118 struct grid_toKernelImpl<true,base_grid,dim,T,S>
119 {
120  template<typename grid_type> static base_grid toKernel(grid_type & gc)
121  {
122  /*grid_gpu_ker<dim,T,memory_traits_inte, typename grid_type::linearizer_type>*/ base_grid g(gc.getGrid());
123  copy_switch_memory_c_no_cpy<typename std::remove_reference<decltype(gc.get_internal_data_())>::type,
124  typename std::remove_reference<decltype(g.get_data_())>::type> cp_mc(gc.get_internal_data_(),g.get_data_());
125 
126  boost::mpl::for_each_ref< boost::mpl::range_c<int,0,T::max_prop> >(cp_mc);
127 
128  return g;
129  }
130 };
131 
132 #endif /* CUDA_GRID_GPU_FUNCS_CUH_ */
This is a distributed grid.
size_t size() const
Return the total number of points in the grid.
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:19
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition: grid_key.hpp:516
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition: grid_key.hpp:503
this class is a functor for "for_each" algorithm