2#include <Grid/map_grid.hpp>
3#include "Point_test.hpp"
5#include "cuda_grid_unit_tests_func.cuh"
7template<
typename gr
id_type1,
typename gr
id_type2>
8__global__
void grid_gradient_vector(grid_type1 g1, grid_type2 g2,
ite_gpu<3> ite_gpu)
12 g2.template get<4>(key)[0] = (g1.template get<0>(key.move(0,1)) - g1.template get<0>(key.move(0,-1))) / 2.0;
13 g2.template get<4>(key)[1] = (g1.template get<0>(key.move(1,1)) - g1.template get<0>(key.move(1,-1))) / 2.0;
14 g2.template get<4>(key)[2] = (g1.template get<0>(key.move(2,1)) - g1.template get<0>(key.move(2,-1))) / 2.0;
17template<
typename gr
id_type>
22 g1.template get<4>(key)[0] = 1.0;
23 g1.template get<4>(key)[1] = 2.0;
24 g1.template get<4>(key)[2] = 3.0;
27template<
typename gr
id_type>
32 g1.template get<4>(key)[0] = 1001.0;
33 g1.template get<4>(key)[1] = 1002.0;
34 g1.template get<4>(key)[2] = 1003.0;
37template<
typename gr
id_type>
42 g2.template get<0>(key) = g1.template get<0>(key.move(0,1)) + g1.template get<0>(key.move(0,-1)) +
43 g1.template get<0>(key.move(1,1)) + g1.template get<0>(key.move(1,-1)) +
44 g1.template get<0>(key.move(2,1)) + g1.template get<0>(key.move(2,-1)) -
45 6.0*g1.template get<0>(key);
50 GRID_ID_3_TRAW(start,stop);
52 prp_1[tz*sz*sz + ty*sz + tx] = prp_0[tz*sz*sz + ty*sz + tx + 1] + prp_0[tz*sz*sz + ty*sz + tx - 1] +
53 prp_0[tz*sz*sz + (ty + 1)*sz + tx] + prp_0[tz*sz*sz + (ty - 1)*sz + tx] +
54 prp_0[(tz + 1)*sz*sz + ty*sz + tx + 1] + prp_0[(tz - 1)*sz*sz + ty*sz + tx - 1] -
55 6.0*prp_0[tz*sz*sz + ty*sz + tx];
58__global__
void fill_one(
float * prp_0,
int sz)
61 int tx = threadIdx.x + blockIdx.x * blockDim.x;
62 int ty = threadIdx.y + blockIdx.y * blockDim.y;
63 int tz = threadIdx.z + blockIdx.z * blockDim.z;
65 prp_0[tz*sz*sz + ty*sz + tx] = 1.0f;
68__global__
void fill_count(
float * prp_0,
int sz)
71 int tx = threadIdx.x + blockIdx.x * blockDim.x;
72 int ty = threadIdx.y + blockIdx.y * blockDim.y;
73 int tz = threadIdx.z + blockIdx.z * blockDim.z;
75 prp_0[tz*sz*sz + ty*sz + tx] = tz*sz*sz + ty*sz + tx;
86 float * prp_0 = (
float *)g.getDeviceBuffer<0>();
88 CUDA_LAUNCH_DIM3((fill_one),
grid, threads ,prp_0,64);
99 float * prp_0 = (
float *)g.getDeviceBuffer<0>();
101 CUDA_LAUNCH_DIM3((fill_count),
grid, threads, prp_0,64);
109 float * prp_0 = (
float *)g1.getDeviceBuffer<0>();
110 float * prp_1 = (
float *)g2.getDeviceBuffer<0>();
112 auto gpu_it = g2.getGPUIterator(start,stop);
114 CUDA_LAUNCH_DIM3(compute_stencil, gpu_it.thr, gpu_it.wthr,prp_0,prp_1,64,start,stop);
120 auto gpu_it = g2.getGPUIterator(start,stop);
122 auto g1k = g1.toKernel();
123 auto g2k = g2.toKernel();
125 CUDA_LAUNCH_DIM3(compute_stencil_grid, gpu_it.thr, gpu_it.wthr,g1k,g2k,gpu_it);
130 auto gpu_it = g1.getGPUIterator(start,stop);
132 CUDA_LAUNCH_DIM3(grid_fill_vector, gpu_it.thr, gpu_it.wthr ,g1.toKernel(),gpu_it);
137 auto gpu_it = g1.getGPUIterator(start,stop);
139 CUDA_LAUNCH_DIM3(grid_fill_vector2, gpu_it.thr, gpu_it.wthr ,g1.toKernel(),gpu_it);
144 auto gpu_it = g1.getGPUIterator(start,stop);
146 CUDA_LAUNCH_DIM3(grid_gradient_vector, gpu_it.thr, gpu_it.wthr ,g1.toKernel(),g2.toKernel(),gpu_it);
This is a distributed grid.
grid_key_dx is the key to access any element in the grid