1 #define VCLUSTER_PERF_REPORT
2 #define SYNC_BEFORE_TAKE_TIME
3 #define ENABLE_GRID_DIST_ID_PERF_STATS
4 #include "Decomposition/Distribution/BoxDistribution.hpp"
5 #include "util/cuda_util.hpp"
6 #include "Grid/grid_dist_id.hpp"
7 #include "data_type/aggregate.hpp"
73 constexpr
int U_next = 2;
74 constexpr
int V_next = 3;
82 typedef sgrid_dist_id_gpu<3,float,aggregate<float>,
CudaMemory, Dec> SparseGridType;
90 for (
int i = 0 ; i < 10 ; i++)
95 grid.addPoints([] __device__ (
int i,
int j,
int k)
99 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
101 data.template get<U>() = 1.0;
106 grid.template flush<smax_<U>>(flush_type::FLUSH_ON_DEVICE);
110 std::cout <<
"Time populate: " << t.getwct() << std::endl;
113 cudaDeviceSynchronize();
116 grid.addPoints([] __device__ (
int i,
int j,
int k)
120 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
122 data.template get<U>() = 5.0;
127 grid.template flush<sRight_<U>>(flush_type::FLUSH_ON_DEVICE);
131 std::cout <<
"Time populate: " << t2.
getwct() << std::endl;
136 int main(
int argc,
char* argv[])
138 openfpm_init(&argc,&argv);
144 size_t sz[3] = {512,512,512};
163 size_t timeSteps = 300;
165 size_t timeSteps = 15000;
172 SparseGridType
grid(sz,domain,g,bc);
175 float spacing[3] = {
grid.spacing(0),
grid.spacing(1),
grid.spacing(2)};
180 grid.deviceToHost<U>();
215 int main(
int argc,
char* argv[])
This class decompose a space into sub-sub-domains and distribute them across processors.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
[v_transform metafunction]