1#define VCLUSTER_PERF_REPORT
2#define SYNC_BEFORE_TAKE_TIME
3#define ENABLE_GRID_DIST_ID_PERF_STATS
4#include "Decomposition/Distribution/BoxDistribution.hpp"
5#include "util/cuda_launch.hpp"
6#include "Grid/grid_dist_id.hpp"
7#include "data_type/aggregate.hpp"
73constexpr int U_next = 2;
74constexpr int V_next = 3;
82typedef sgrid_dist_id_gpu<3,float,aggregate<float>,
CudaMemory, Dec> SparseGridType;
90 for (
int i = 0 ; i < 10 ; i++)
95 grid.addPoints([] __device__ (
int i,
int j,
int k)
99 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
101 data.template get<U>() = 1.0;
106 grid.template flush<smax_<U>>(flush_type::FLUSH_ON_DEVICE);
110 std::cout <<
"Time populate: " << t.
getwct() << std::endl;
113 cudaDeviceSynchronize();
116 grid.addPoints([] __device__ (
int i,
int j,
int k)
120 [] __device__ (InsertBlockT & data,
int i,
int j,
int k)
122 data.template get<U>() = 5.0;
127 grid.template flush<sRight_<U>>(flush_type::FLUSH_ON_DEVICE);
131 std::cout <<
"Time populate: " <<
t2.getwct() << std::endl;
136int main(
int argc,
char* argv[])
138 openfpm_init(&argc,&argv);
144 size_t sz[3] = {512,512,512};
163 size_t timeSteps = 300;
165 size_t timeSteps = 15000;
172 SparseGridType
grid(sz,domain,g,bc);
175 float spacing[3] = {
grid.spacing(0),
grid.spacing(1),
grid.spacing(2)};
180 grid.deviceToHost<U>();
215int main(
int argc,
char* argv[])
This class represent an N-dimensional box.
This class decompose a space into sub-sub-domains and distribute them across processors.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
[v_transform metafunction]
It model an expression expr1 * expr2.