OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
cuda_grid_unit_tests_func.cu
1 #include "config.h"
2 #include <Grid/map_grid.hpp>
3 #include "Point_test.hpp"
4 #include <stdio.h>
5 #include "cuda_grid_unit_tests_func.cuh"
6 
7 template<typename grid_type1, typename grid_type2>
8 __global__ void grid_gradient_vector(grid_type1 g1, grid_type2 g2, ite_gpu<3> ite_gpu)
9 {
10  GRID_ID_3(ite_gpu);
11 
12  g2.template get<4>(key)[0] = (g1.template get<0>(key.move(0,1)) - g1.template get<0>(key.move(0,-1))) / 2.0;
13  g2.template get<4>(key)[1] = (g1.template get<0>(key.move(1,1)) - g1.template get<0>(key.move(1,-1))) / 2.0;
14  g2.template get<4>(key)[2] = (g1.template get<0>(key.move(2,1)) - g1.template get<0>(key.move(2,-1))) / 2.0;
15 }
16 
17 template<typename grid_type>
18 __global__ void grid_fill_vector(grid_type g1, ite_gpu<3> ite_gpu)
19 {
20  GRID_ID_3(ite_gpu);
21 
22  g1.template get<4>(key)[0] = 1.0;
23  g1.template get<4>(key)[1] = 2.0;
24  g1.template get<4>(key)[2] = 3.0;
25 }
26 
27 template<typename grid_type>
28 __global__ void grid_fill_vector2(grid_type g1, ite_gpu<3> ite_gpu)
29 {
30  GRID_ID_3(ite_gpu);
31 
32  g1.template get<4>(key)[0] = 1001.0;
33  g1.template get<4>(key)[1] = 1002.0;
34  g1.template get<4>(key)[2] = 1003.0;
35 }
36 
37 template<typename grid_type>
38 __global__ void compute_stencil_grid(grid_type g1, grid_type g2, ite_gpu<3> ite_gpu)
39 {
40  GRID_ID_3(ite_gpu);
41 
42  g2.template get<0>(key) = g1.template get<0>(key.move(0,1)) + g1.template get<0>(key.move(0,-1)) +
43  g1.template get<0>(key.move(1,1)) + g1.template get<0>(key.move(1,-1)) +
44  g1.template get<0>(key.move(2,1)) + g1.template get<0>(key.move(2,-1)) -
45  6.0*g1.template get<0>(key);
46 }
47 
48 __global__ void compute_stencil(float * prp_0, float * prp_1, int sz, grid_key_dx<3> start, grid_key_dx<3> stop)
49 {
50  GRID_ID_3_TRAW(start,stop);
51 
52  prp_1[tz*sz*sz + ty*sz + tx] = prp_0[tz*sz*sz + ty*sz + tx + 1] + prp_0[tz*sz*sz + ty*sz + tx - 1] +
53  prp_0[tz*sz*sz + (ty + 1)*sz + tx] + prp_0[tz*sz*sz + (ty - 1)*sz + tx] +
54  prp_0[(tz + 1)*sz*sz + ty*sz + tx + 1] + prp_0[(tz - 1)*sz*sz + ty*sz + tx - 1] -
55  6.0*prp_0[tz*sz*sz + ty*sz + tx];
56 }
57 
58 __global__ void fill_one(float * prp_0,int sz)
59 {
60  // Thread index
61  int tx = threadIdx.x + blockIdx.x * blockDim.x;
62  int ty = threadIdx.y + blockIdx.y * blockDim.y;
63  int tz = threadIdx.z + blockIdx.z * blockDim.z;
64 
65  prp_0[tz*sz*sz + ty*sz + tx] = 1.0f;
66 }
67 
68 __global__ void fill_count(float * prp_0,int sz)
69 {
70  // Thread index
71  int tx = threadIdx.x + blockIdx.x * blockDim.x;
72  int ty = threadIdx.y + blockIdx.y * blockDim.y;
73  int tz = threadIdx.z + blockIdx.z * blockDim.z;
74 
75  prp_0[tz*sz*sz + ty*sz + tx] = tz*sz*sz + ty*sz + tx;
76 }
77 
78 // call compute
79 
80 void gpu_grid_3D_one(grid_gpu<3,Point_aggr_test> & g)
81 {
82  // Setup execution parameters
83  dim3 threads(8,8,8);
84  dim3 grid(8,8,8);
85 
86  float * prp_0 = (float *)g.getDeviceBuffer<0>();
87 
88  CUDA_LAUNCH_DIM3((fill_one), grid, threads ,prp_0,64);
89 }
90 
91 // call compute
92 
93 void gpu_grid_3D_compute(grid_gpu<3,Point_aggr_test> & g)
94 {
95  // Setup execution parameters
96  dim3 threads(8,8,8);
97  dim3 grid(8,8,8);
98 
99  float * prp_0 = (float *)g.getDeviceBuffer<0>();
100 
101  CUDA_LAUNCH_DIM3((fill_count), grid, threads, prp_0,64);
102 }
103 
104 void gpu_grid_3D_compute_stencil(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2,
105  grid_key_dx<3> & start, grid_key_dx<3> & stop)
106 {
107  // Setup execution parameters
108 
109  float * prp_0 = (float *)g1.getDeviceBuffer<0>();
110  float * prp_1 = (float *)g2.getDeviceBuffer<0>();
111 
112  auto gpu_it = g2.getGPUIterator(start,stop);
113 
114  CUDA_LAUNCH_DIM3(compute_stencil, gpu_it.thr, gpu_it.wthr,prp_0,prp_1,64,start,stop);
115 }
116 
117 void gpu_grid_3D_compute_grid_stencil(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2,
118  grid_key_dx<3> & start, grid_key_dx<3> & stop)
119 {
120  auto gpu_it = g2.getGPUIterator(start,stop);
121 
122  auto g1k = g1.toKernel();
123  auto g2k = g2.toKernel();
124 
125  CUDA_LAUNCH_DIM3(compute_stencil_grid, gpu_it.thr, gpu_it.wthr,g1k,g2k,gpu_it);
126 }
127 
128 void gpu_grid_fill_vector(grid_gpu<3,Point_aggr_test> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop)
129 {
130  auto gpu_it = g1.getGPUIterator(start,stop);
131 
132  CUDA_LAUNCH_DIM3(grid_fill_vector, gpu_it.thr, gpu_it.wthr ,g1.toKernel(),gpu_it);
133 }
134 
135 void gpu_grid_fill_vector2(grid_gpu<3,Point_aggr_test> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop)
136 {
137  auto gpu_it = g1.getGPUIterator(start,stop);
138 
139  CUDA_LAUNCH_DIM3(grid_fill_vector2, gpu_it.thr, gpu_it.wthr ,g1.toKernel(),gpu_it);
140 }
141 
142 void gpu_grid_gradient_vector(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2, grid_key_dx<3> & start, grid_key_dx<3> & stop)
143 {
144  auto gpu_it = g1.getGPUIterator(start,stop);
145 
146  CUDA_LAUNCH_DIM3(grid_gradient_vector, gpu_it.thr, gpu_it.wthr ,g1.toKernel(),g2.toKernel(),gpu_it);
147 }
148 
grid_key_dx is the key to access any element in the grid
Definition: grid_key.hpp:18
This is a distributed grid.