7#define BOOST_TEST_DYN_LINK 
    8#define DISABLE_MPI_WRITTERS 
   10#include <boost/test/unit_test.hpp> 
   11#include "performancePlots.hpp" 
   13#include "SparseGridGpu/SparseGridGpu.hpp" 
   14#include "SparseGridGpu/tests/utils/SparseGridGpu_util_test.cuh" 
   16extern std::string suiteURI;
 
   18extern std::set<std::string> testSet;
 
   20template<
unsigned int blockEdgeSize, 
unsigned int gr
idEdgeSize, 
typename SparseGr
idZ>
 
   21void testStencilHeatSparse_perf(
unsigned int i, std::string base, 
float fillMultiplier=1, 
float voidMultiplier=1)
 
   23    auto testName = 
"In-place sparse stencil";
 
   25    constexpr unsigned int dim = SparseGridZ::dims;
 
   33    report_sparsegrid_funcs.graphs.put(base + 
".dim",2);
 
   34    report_sparsegrid_funcs.graphs.put(base + 
".blockSize",blockEdgeSize);
 
   35    report_sparsegrid_funcs.graphs.put(base + 
".gridSize.x",gridEdgeSize*blockEdgeSize);
 
   36    report_sparsegrid_funcs.graphs.put(base + 
".gridSize.y",gridEdgeSize*blockEdgeSize);
 
   38    unsigned int iterations = 100;
 
   43    dim3 gridSize(gridEdgeSize, gridEdgeSize);
 
   44    unsigned int spatialEdgeSize = 1000000;
 
   45    size_t sz[2] = {spatialEdgeSize, spatialEdgeSize};
 
   46    typename SparseGridZ::grid_info blockGeometry(sz);
 
   47    SparseGridZ sparseGrid(blockGeometry);
 
   49    sparseGrid.template setBackgroundValue<0>(0);
 
   52    float allMultiplier = fillMultiplier + voidMultiplier;
 
   53    const unsigned int numSpheres = gridEdgeSize / (2*allMultiplier);
 
   55    unsigned int centerPoint = spatialEdgeSize / 2;
 
   57    for (
int i = 1; i <= numSpheres; ++i)
 
   59        unsigned int rBig = allMultiplier*i * blockEdgeSize;
 
   60        unsigned int rSmall = (allMultiplier*i - fillMultiplier) * blockEdgeSize;
 
   63        sparseGrid.setGPUInsertBuffer(gridSize, dim3(1));
 
   64        CUDA_LAUNCH_DIM3((insertSphere<0>),
 
   65                         gridSize, dim3(blockEdgeSize * blockEdgeSize, 1, 1),
 
   66                         sparseGrid.toKernel(), start1, rBig, rSmall, 5);
 
   67        cudaDeviceSynchronize();
 
   68        sparseGrid.template flush<smax_<0 >>(ctx, flush_type::FLUSH_ON_DEVICE);
 
   69        cudaDeviceSynchronize();
 
   73    sparseGrid.findNeighbours(); 
 
   74    sparseGrid.tagBoundaries(ctx);
 
   76    sparseGrid.template deviceToHost<0>(); 
 
   77    auto existingElements = sparseGrid.countExistingElements();
 
   78    auto boundaryElements = sparseGrid.countBoundaryElements();
 
   79    unsigned long long numElements = existingElements - boundaryElements;
 
   82    sparseGrid.template applyStencils<BoundaryStencilSetXRescaled<dim,0,0>>(sparseGrid.getBox(),STENCIL_MODE_INPLACE,
 
   83            centerPoint, centerPoint + 2*blockEdgeSize*gridEdgeSize,
 
   87    for (
unsigned int iter=0; iter<iterations; ++iter)
 
   89        cudaDeviceSynchronize();
 
   94        sparseGrid.template applyStencils<Stencil01T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
 
   95        cudaDeviceSynchronize();
 
   96        sparseGrid.template applyStencils<Stencil10T>(sparseGrid.getBox(),STENCIL_MODE_INPLACE, 0.1);
 
   97        cudaDeviceSynchronize();
 
  101        measures_tm.add(ts.
getwct());
 
  103        float gElemS = 2 * numElements / (1e9 * ts.
getwct());
 
  104        float gFlopsS = gElemS * Stencil01T::flops;
 
  106        measures_gf.add(gFlopsS);
 
  110    double deviation_tm = 0;
 
  111    standard_deviation(measures_tm,mean_tm,deviation_tm);
 
  114    double deviation_gf = 0;
 
  115    standard_deviation(measures_gf,mean_gf,deviation_gf);
 
  119    float gElemS =  2 * numElements / (1e9 * mean_tm);
 
  120    float gFlopsS = gElemS * Stencil01T::flops;
 
  121    std::cout << 
"Test: " << testName << std::endl;
 
  122    std::cout << 
"Block: " << blockEdgeSize << 
"x" << blockEdgeSize << std::endl;
 
  123    std::cout << 
"Grid: " << gridEdgeSize*blockEdgeSize << 
"x" << gridEdgeSize*blockEdgeSize << std::endl;
 
  124    double dataOccupancyMean, dataOccupancyDev;
 
  125    sparseGrid.deviceToHost();
 
  126    sparseGrid.measureBlockOccupancy(dataOccupancyMean, dataOccupancyDev);
 
  127    std::cout << 
"Data Occupancy: " << dataOccupancyMean << 
" dev:" << dataOccupancyDev << std::endl;
 
  128    report_sparsegrid_funcs.graphs.put(base + 
".dataOccupancy.mean",dataOccupancyMean);
 
  129    report_sparsegrid_funcs.graphs.put(base +
".dataOccupancy.dev",dataOccupancyDev);
 
  130    std::cout << 
"Iterations: " << iterations << std::endl;
 
  131    std::cout << 
"\tStencil: " << mean_gf << 
" dev:" << deviation_gf << 
" s" << std::endl;
 
  132    std::cout << 
"Throughput: " << std::endl << 
"\t " << gElemS << 
" GElem/s " << std::endl << 
"\t " << gFlopsS << 
" GFlops/s" << std::endl;
 
  134    report_sparsegrid_funcs.graphs.put(base + 
".GFlops.mean",mean_gf);
 
  135    report_sparsegrid_funcs.graphs.put(base +
".GFlops.dev",deviation_gf);
 
  136    report_sparsegrid_funcs.graphs.put(base + 
".time.mean",mean_tm);
 
  137    report_sparsegrid_funcs.graphs.put(base +
".time.dev",deviation_tm);
 
  140template<
unsigned int blockEdgeSize, 
unsigned int gr
idEdgeSize>
 
  141void launch_testStencilHeatSparse_perf(std::string testURI, 
unsigned int i,
 
  142        float fillMultiplier=1, 
float voidMultiplier=1, std::string occupancyStr=
"05")
 
  144    constexpr unsigned int dim = 2;
 
  148    std::string base(testURI + 
"(" + std::to_string(i) + 
")");
 
  149    report_sparsegrid_funcs.graphs.put(base + 
".test.name",
"StencilNSparse"+occupancyStr);
 
  151    testStencilHeatSparse_perf<blockEdgeSize, gridEdgeSize,
 
  153                    fillMultiplier, voidMultiplier);
 
  154    cudaDeviceSynchronize();
 
  157template<
unsigned int blockEdgeSize, 
unsigned int gr
idEdgeSize>
 
  158void launch_testStencilHeatSparseZ_perf(std::string testURI, 
unsigned int i,
 
  159                                       float fillMultiplier=1, 
float voidMultiplier=1, std::string occupancyStr=
"05")
 
  161    constexpr unsigned int dim = 2;
 
  165    std::string base(testURI + 
"(" + std::to_string(i) + 
")");
 
  166    report_sparsegrid_funcs.graphs.put(base + 
".test.name",
"StencilNSparse"+occupancyStr);
 
  168    testStencilHeatSparse_perf<blockEdgeSize, gridEdgeSize,
 
  170                                                                                fillMultiplier, voidMultiplier);
 
  171    cudaDeviceSynchronize();
 
  174BOOST_AUTO_TEST_SUITE(performance)
 
  176BOOST_AUTO_TEST_SUITE(SparseGridGpu_test)
 
  178BOOST_AUTO_TEST_CASE(testStencilHeatSparse05_gridScaling)
 
  180    std::string testURI = suiteURI + 
".device.stencil.sparse.N.2D.05.gridScaling";
 
  181    unsigned int counter = 0;
 
  182    constexpr unsigned int blockEdgeSize = 8;
 
  183    launch_testStencilHeatSparse_perf<blockEdgeSize, 128>(testURI, counter++, 1.45, 1, 
"05");
 
  184    launch_testStencilHeatSparse_perf<blockEdgeSize, 256>(testURI, counter++, 1.45, 1, 
"05");
 
  185    launch_testStencilHeatSparse_perf<blockEdgeSize, 512>(testURI, counter++, 1.45, 1, 
"05");
 
  186    launch_testStencilHeatSparse_perf<blockEdgeSize, 1024>(testURI, counter++, 1.45, 1, 
"05");
 
  189    testSet.insert(testURI);
 
  193BOOST_AUTO_TEST_CASE(testStencilHeatSparse08_gridScaling)
 
  195    std::string testURI = suiteURI + 
".device.stencil.sparse.N.2D.08.gridScaling";
 
  196    unsigned int counter = 0;
 
  197    constexpr unsigned int blockEdgeSize = 8;
 
  198    launch_testStencilHeatSparse_perf<blockEdgeSize, 128>(testURI, counter++, 2, 0.20, 
"08");
 
  199    launch_testStencilHeatSparse_perf<blockEdgeSize, 256>(testURI, counter++, 2, 0.20, 
"08");
 
  200    launch_testStencilHeatSparse_perf<blockEdgeSize, 512>(testURI, counter++, 2, 0.20, 
"08");
 
  201    launch_testStencilHeatSparse_perf<blockEdgeSize, 1024>(testURI, counter++, 2, 0.20, 
"08");
 
  203    testSet.insert(testURI);
 
  207BOOST_AUTO_TEST_CASE(testStencilHeatSparse09_gridScaling)
 
  209    std::string testURI = suiteURI + 
".device.stencil.sparse.N.2D.09.gridScaling";
 
  210    unsigned int counter = 0;
 
  211    constexpr unsigned int blockEdgeSize = 8;
 
  212    launch_testStencilHeatSparse_perf<blockEdgeSize, 128>(testURI, counter++, 2.3, 0.07, 
"09");
 
  213    launch_testStencilHeatSparse_perf<blockEdgeSize, 256>(testURI, counter++, 2.3, 0.07, 
"09");
 
  214    launch_testStencilHeatSparse_perf<blockEdgeSize, 512>(testURI, counter++, 2.3, 0.07, 
"09");
 
  215    launch_testStencilHeatSparse_perf<blockEdgeSize, 1024>(testURI, counter++, 2.3, 0.07, 
"09");
 
  217    testSet.insert(testURI);
 
  220BOOST_AUTO_TEST_CASE(testStencilHeatSparseZ05_gridScaling)
 
  222    std::string testURI = suiteURI + 
".device.stencil.sparse.Z.2D.05.gridScaling";
 
  223    unsigned int counter = 0;
 
  224    constexpr unsigned int blockEdgeSize = 8;
 
  225    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 128>(testURI, counter++, 1.45, 1, 
"05");
 
  226    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 256>(testURI, counter++, 1.45, 1, 
"05");
 
  227    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 512>(testURI, counter++, 1.45, 1, 
"05");
 
  228    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 1024>(testURI, counter++, 1.45, 1, 
"05");
 
  230    testSet.insert(testURI);
 
  234BOOST_AUTO_TEST_CASE(testStencilHeatSparseZ08_gridScaling)
 
  236    std::string testURI = suiteURI + 
".device.stencil.sparse.Z.2D.08.gridScaling";
 
  237    unsigned int counter = 0;
 
  238    constexpr unsigned int blockEdgeSize = 8;
 
  239    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 128>(testURI, counter++, 2, 0.20, 
"08");
 
  240    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 256>(testURI, counter++, 2, 0.20, 
"08");
 
  241    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 512>(testURI, counter++, 2, 0.20, 
"08");
 
  242    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 1024>(testURI, counter++, 2, 0.20, 
"08");
 
  244    testSet.insert(testURI);
 
  248BOOST_AUTO_TEST_CASE(testStencilHeatSparseZ09_gridScaling)
 
  250    std::string testURI = suiteURI + 
".device.stencil.sparse.Z.2D.09.gridScaling";
 
  251    unsigned int counter = 0;
 
  252    constexpr unsigned int blockEdgeSize = 8;
 
  253    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 128>(testURI, counter++, 2.3, 0.07, 
"09");
 
  254    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 256>(testURI, counter++, 2.3, 0.07, 
"09");
 
  255    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 512>(testURI, counter++, 2.3, 0.07, 
"09");
 
  256    launch_testStencilHeatSparseZ_perf<blockEdgeSize, 1024>(testURI, counter++, 2.3, 0.07, 
"09");
 
  258    testSet.insert(testURI);
 
  261BOOST_AUTO_TEST_CASE(testStencilHeatSparse05_32Block_2048Grid_Case)
 
  263    std::string testURI = suiteURI + 
".device.stencil.sparse.N.2D.05.32_2048";
 
  264    unsigned int counter = 0;
 
  265    launch_testStencilHeatSparse_perf<32, 2048/32>(testURI, counter++, 1.45, 1, 
"05");
 
  267    testSet.insert(testURI);
 
  270BOOST_AUTO_TEST_SUITE_END()
 
  272BOOST_AUTO_TEST_SUITE_END()
 
grid_key_dx is the key to access any element in the grid
 
Implementation of 1-D std::vector like structure.
 
Class for cpu time benchmarking.
 
void stop()
Stop the timer.
 
void start()
Start the timer.
 
double getwct()
Return the elapsed real time.
 
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...