OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cuda_grid_gpu_int.cu
1 #include "config.h"
2 #define BOOST_TEST_DYN_LINK
3 #include <boost/test/unit_test.hpp>
4 #include "Space/Shape/Box.hpp"
5 #include "Vector/map_vector.hpp"
6 #include "NN/CellList/cuda/CellList_gpu.hpp"
7 #define DISABLE_MPI_WRITTERS
8 #include "VTKWriter/VTKWriter.hpp"
9 
10 template<typename vector_pos_type, typename vector_prop_type>
11 __global__ void test_launch(vector_pos_type set_points, vector_prop_type prop, Box<3,float> domain)
12 {
13  int p = threadIdx.x + blockIdx.x * blockDim.x;
14 
15  if (p >= set_points.size())
16  {return;}
17 
18  float pos[3];
19  pos[0] = set_points.template get<0>(p)[0];
20  pos[1] = set_points.template get<0>(p)[1];
21  pos[2] = set_points.template get<0>(p)[1];
22 
23  float scalar = prop.template get<0>(p);
24 
25  float v[3];
26 
27  v[0] = prop.template get<1>(p)[0];
28  v[1] = prop.template get<1>(p)[1];
29  v[2] = prop.template get<1>(p)[2];
30 }
31 
32 template<typename grid_type>
33 __global__ void test_launch_grid(grid_type grid, Box<3,float> domain, ite_gpu<3> ite_gpu)
34 {
35  GRID_ID_3(ite_gpu);
36 
37  float scalar = grid.template get<0>(key);
38 
39  float v[3];
40 
41  v[0] = grid.template get<1>(key)[0];
42  v[1] = grid.template get<1>(key)[1];
43  v[2] = grid.template get<1>(key)[2];
44 
45  printf("Grid point %d %d %d scalar: %f vector: %f %f %f \n",(int)key.get(0),(int)key.get(1),(int)key.get(2),scalar,v[0],v[1],v[2]);
46 }
47 
48 __global__ void test_launch_cuda_native(float * scalar, float * vector, int sxy, int sx , int sy , int sz , int stride)
49 {
50  int id[3];
51 
52  id[0] = threadIdx.x + blockIdx.x * blockDim.x;
53  id[1] = threadIdx.y + blockIdx.y * blockDim.y;
54  id[2] = threadIdx.z + blockIdx.z * blockDim.z;
55 
56  if (id[0] >= sx) {return;}
57  if (id[1] >= sy) {return;}
58  if (id[2] >= sz) {return;}
59 
60  float s = scalar[id[2]*sxy+id[1]*sx+id[0]];
61 
62  float v[3];
63 
64  v[0] = vector[id[2]*sxy+id[1]*sx+id[0] + 0*stride];
65  v[1] = vector[id[2]*sxy+id[1]*sx+id[0] + 1*stride];
66  v[2] = vector[id[2]*sxy+id[1]*sx+id[0] + 2*stride];
67 
68  printf("Grid point from CUDA %d %d %d scalar: %f vector: %f %f %f \n",id[0],id[1],id[2],s,v[0],v[1],v[2]);
69 }
70 
71 constexpr int NN_num = 4;
72 
73 /*template<typename celllist_type>
74 __global__ void test_launch_cell_list(celllist_type cell, ite_gpu<3> ite_gpu)
75 {
76  GRID_ID_3(ite_gpu)
77 
78  int nn_part = 0;
79 
80  int idx = 0;
81  int NN[NN_num];
82 
83  auto NN_it = cell.template getNNIteratorBox<2>(key);
84 
85  while (NN_it.isNext())
86  {
87  auto q = NN_it.get();
88 
89  if (idx < NN_num)
90  {
91  NN[idx] = q;
92  idx++;
93  }
94 
95  nn_part++;
96 
97  ++NN_it;
98  }
99 
100  printf("CELLLIST %d %d %d nn_part: %d NN: %d %d %d %d \n",(int)key.get(0),(int)key.get(1),(int)key.get(2),nn_part,NN[0],NN[1],NN[2],NN[3]);
101 }*/
102 
103 BOOST_AUTO_TEST_SUITE( grid_gpu_func_interp )
104 
105 BOOST_AUTO_TEST_CASE (gpu_p2m)
106 {
109 
110  pos.resize(100);
111  prop.resize(100);
112 
113  for (size_t i = 0 ; i < 100 ; i++)
114  {
115  pos.template get<0>(i)[0] = (float)rand() / RAND_MAX;
116  pos.template get<0>(i)[1] = (float)rand() / RAND_MAX;
117  pos.template get<0>(i)[2] = (float)rand() / RAND_MAX;
118 
119  prop.template get<0>(i) = 5.0;
120  prop.template get<1>(i)[0] = 3.0;
121  prop.template get<1>(i)[1] = 4.0;
122  prop.template get<1>(i)[2] = 5.0;
123  }
124 
125  pos.template hostToDevice<0>();
126  prop.template hostToDevice<0,1>();
127 
128  Box<3,float> domain({0.0,0.0,0.0},{1.0,1.0,1.0});
129 
130  auto ite = pos.getGPUIterator();
131 
132  test_launch<<<ite.wthr,ite.thr>>>(pos.toKernel(),prop.toKernel(),domain);
133 
135 
136  grid.resize({10,10,10});
137 
138  auto it = grid.getIterator();
139 
140  while (it.isNext())
141  {
142  auto key = it.get();
143 
144  grid.template get<0>(key) = key.get(0) + key.get(1) + key.get(2);
145  grid.template get<1>(key)[0] = key.get(0);
146  grid.template get<1>(key)[1] = key.get(1);
147  grid.template get<1>(key)[2] = key.get(2);
148 
149  ++it;
150  }
151 
152  grid_key_dx<3> start({0,0,0});
153  grid_key_dx<3> stop({9,9,9});
154 
155  auto ite_g = grid.getGPUIterator(start,stop);
156 
157  grid.template hostToDevice<0,1>();
158 
159  test_launch_grid<<<ite_g.wthr,ite_g.thr>>>(grid.toKernel(),domain,ite_g);
160 
162 
163  test_launch_cuda_native<<<ite_g.wthr,ite_g.thr>>>((float *)grid.template getDeviceBuffer<0>(),(float *)grid.template getDeviceBuffer<1>(),100,10,10,10,grid.size());
164 
166 
169 
170  pos_sort.resize(pos.size());
171  prop_sort.resize(prop.size());
172 
173  size_t g_m = pos.size();
174 
175  mgpu::ofp_context_t context(false);
176 
177  const size_t (& sz)[3] = grid.getGrid().getSize();
178 
179  CellList_gpu<3,float,CudaMemory,shift_only<3,float>> cl(domain,sz,2);
180 
181  cl.template construct(pos,pos_sort,prop,prop_sort,context,g_m);
182 
183  grid_key_dx<3> start_c({2,2,2});
184  grid_key_dx<3> stop_c({11,11,11});
185 
186  auto ite_gpu = getGPUIterator_impl(cl.getGrid(),start_c,stop_c);
187 
188  test_launch_cell_list<<<ite_gpu.wthr,ite_gpu.thr>>>(cl.toKernel(),ite_gpu);
189 
191 
192  // VTKWriter for a set of points
195  VECTOR_POINTS> vtk_writer;
196  vtk_writer.add(pos,prop,pos.size());
197 
199  prp_names.add("scalar");
200  prp_names.add("vector");
201 
202  file_type ft = file_type::ASCII;
203 
204  pos.template deviceToHost<0>();
205  prop.template deviceToHost<0,1>();
206 
207  // Write the VTK file
208  vtk_writer.write("particles.vtk",prp_names,"particles",ft);
209 }
210 
211 BOOST_AUTO_TEST_CASE (gpu_m2p)
212 {
213 
214 }
215 
216 BOOST_AUTO_TEST_SUITE_END()
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:18
size_t size()
Stub size.
Definition: map_vector.hpp:211
This is a distributed grid.
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202