OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
cuda_grid_unit_tests_func.cu
1#include "config.h"
2#include <Grid/map_grid.hpp>
3#include "Point_test.hpp"
4#include <stdio.h>
5#include "cuda_grid_unit_tests_func.cuh"
6
7template<typename grid_type1, typename grid_type2>
8__global__ void grid_gradient_vector(grid_type1 g1, grid_type2 g2, ite_gpu<3> ite_gpu)
9{
10 GRID_ID_3(ite_gpu);
11
12 g2.template get<4>(key)[0] = (g1.template get<0>(key.move(0,1)) - g1.template get<0>(key.move(0,-1))) / 2.0;
13 g2.template get<4>(key)[1] = (g1.template get<0>(key.move(1,1)) - g1.template get<0>(key.move(1,-1))) / 2.0;
14 g2.template get<4>(key)[2] = (g1.template get<0>(key.move(2,1)) - g1.template get<0>(key.move(2,-1))) / 2.0;
15}
16
17template<typename grid_type>
18__global__ void grid_fill_vector(grid_type g1, ite_gpu<3> ite_gpu)
19{
20 GRID_ID_3(ite_gpu);
21
22 g1.template get<4>(key)[0] = 1.0;
23 g1.template get<4>(key)[1] = 2.0;
24 g1.template get<4>(key)[2] = 3.0;
25}
26
27template<typename grid_type>
28__global__ void grid_fill_vector2(grid_type g1, ite_gpu<3> ite_gpu)
29{
30 GRID_ID_3(ite_gpu);
31
32 g1.template get<4>(key)[0] = 1001.0;
33 g1.template get<4>(key)[1] = 1002.0;
34 g1.template get<4>(key)[2] = 1003.0;
35}
36
37template<typename grid_type>
38__global__ void compute_stencil_grid(grid_type g1, grid_type g2, ite_gpu<3> ite_gpu)
39{
40 GRID_ID_3(ite_gpu);
41
42 g2.template get<0>(key) = g1.template get<0>(key.move(0,1)) + g1.template get<0>(key.move(0,-1)) +
43 g1.template get<0>(key.move(1,1)) + g1.template get<0>(key.move(1,-1)) +
44 g1.template get<0>(key.move(2,1)) + g1.template get<0>(key.move(2,-1)) -
45 6.0*g1.template get<0>(key);
46}
47
48__global__ void compute_stencil(float * prp_0, float * prp_1, int sz, grid_key_dx<3> start, grid_key_dx<3> stop)
49{
50 GRID_ID_3_TRAW(start,stop);
51
52 prp_1[tz*sz*sz + ty*sz + tx] = prp_0[tz*sz*sz + ty*sz + tx + 1] + prp_0[tz*sz*sz + ty*sz + tx - 1] +
53 prp_0[tz*sz*sz + (ty + 1)*sz + tx] + prp_0[tz*sz*sz + (ty - 1)*sz + tx] +
54 prp_0[(tz + 1)*sz*sz + ty*sz + tx + 1] + prp_0[(tz - 1)*sz*sz + ty*sz + tx - 1] -
55 6.0*prp_0[tz*sz*sz + ty*sz + tx];
56}
57
58__global__ void fill_one(float * prp_0,int sz)
59{
60 // Thread index
61 int tx = threadIdx.x + blockIdx.x * blockDim.x;
62 int ty = threadIdx.y + blockIdx.y * blockDim.y;
63 int tz = threadIdx.z + blockIdx.z * blockDim.z;
64
65 prp_0[tz*sz*sz + ty*sz + tx] = 1.0f;
66}
67
68__global__ void fill_count(float * prp_0,int sz)
69{
70 // Thread index
71 int tx = threadIdx.x + blockIdx.x * blockDim.x;
72 int ty = threadIdx.y + blockIdx.y * blockDim.y;
73 int tz = threadIdx.z + blockIdx.z * blockDim.z;
74
75 prp_0[tz*sz*sz + ty*sz + tx] = tz*sz*sz + ty*sz + tx;
76}
77
78// call compute
79
80void gpu_grid_3D_one(grid_gpu<3,Point_aggr_test> & g)
81{
82 // Setup execution parameters
83 dim3 threads(8,8,8);
84 dim3 grid(8,8,8);
85
86 float * prp_0 = (float *)g.getDeviceBuffer<0>();
87
88 CUDA_LAUNCH_DIM3((fill_one), grid, threads ,prp_0,64);
89}
90
91// call compute
92
93void gpu_grid_3D_compute(grid_gpu<3,Point_aggr_test> & g)
94{
95 // Setup execution parameters
96 dim3 threads(8,8,8);
97 dim3 grid(8,8,8);
98
99 float * prp_0 = (float *)g.getDeviceBuffer<0>();
100
101 CUDA_LAUNCH_DIM3((fill_count), grid, threads, prp_0,64);
102}
103
104void gpu_grid_3D_compute_stencil(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2,
105 grid_key_dx<3> & start, grid_key_dx<3> & stop)
106{
107 // Setup execution parameters
108
109 float * prp_0 = (float *)g1.getDeviceBuffer<0>();
110 float * prp_1 = (float *)g2.getDeviceBuffer<0>();
111
112 auto gpu_it = g2.getGPUIterator(start,stop);
113
114 CUDA_LAUNCH_DIM3(compute_stencil, gpu_it.thr, gpu_it.wthr,prp_0,prp_1,64,start,stop);
115}
116
117void gpu_grid_3D_compute_grid_stencil(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2,
118 grid_key_dx<3> & start, grid_key_dx<3> & stop)
119{
120 auto gpu_it = g2.getGPUIterator(start,stop);
121
122 auto g1k = g1.toKernel();
123 auto g2k = g2.toKernel();
124
125 CUDA_LAUNCH_DIM3(compute_stencil_grid, gpu_it.thr, gpu_it.wthr,g1k,g2k,gpu_it);
126}
127
128void gpu_grid_fill_vector(grid_gpu<3,Point_aggr_test> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop)
129{
130 auto gpu_it = g1.getGPUIterator(start,stop);
131
132 CUDA_LAUNCH_DIM3(grid_fill_vector, gpu_it.thr, gpu_it.wthr ,g1.toKernel(),gpu_it);
133}
134
135void gpu_grid_fill_vector2(grid_gpu<3,Point_aggr_test> & g1, grid_key_dx<3> & start, grid_key_dx<3> & stop)
136{
137 auto gpu_it = g1.getGPUIterator(start,stop);
138
139 CUDA_LAUNCH_DIM3(grid_fill_vector2, gpu_it.thr, gpu_it.wthr ,g1.toKernel(),gpu_it);
140}
141
142void gpu_grid_gradient_vector(grid_gpu<3,Point_aggr_test> & g1, grid_gpu<3,Point_aggr_test> & g2, grid_key_dx<3> & start, grid_key_dx<3> & stop)
143{
144 auto gpu_it = g1.getGPUIterator(start,stop);
145
146 CUDA_LAUNCH_DIM3(grid_gradient_vector, gpu_it.thr, gpu_it.wthr ,g1.toKernel(),g2.toKernel(),gpu_it);
147}
148
This is a distributed grid.
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19