1#define BOOST_TEST_DYN_LINK
2#include <boost/test/unit_test.hpp>
3#include "Plot/GoogleChart.hpp"
5#include <boost/property_tree/ptree.hpp>
6#include <boost/property_tree/xml_parser.hpp>
7#include "util/performance/performance_util.hpp"
8#include "Point_test.hpp"
9#include "util/stat/common_statistics.hpp"
11extern const char * test_dir;
15constexpr int N_STAT = 32;
17BOOST_AUTO_TEST_SUITE( performance )
19#define NADD 128*128*128
20#define NADD_GPU 256*256*256
25 boost::property_tree::ptree graphs;
30BOOST_AUTO_TEST_SUITE( vector_performance )
32BOOST_AUTO_TEST_CASE(vector_performance)
34 report_vector_funcs.graphs.put(
"performance.vector(0).funcs.nele",NADD);
35 report_vector_funcs.graphs.put(
"performance.vector(0).funcs.name",
"add");
37 report_vector_funcs.graphs.put(
"performance.vector(1).funcs.nele",NADD);
38 report_vector_funcs.graphs.put(
"performance.vector(1).funcs.name",
"get");
40 std::vector<double> times(N_STAT + 1);
41 std::vector<double> times_g(N_STAT + 1);
44 double tot_accu = 0.0;
46 for (
size_t i = 0 ; i < N_STAT+1 ; i++)
77 for (
size_t j = 0 ; j < NADD ; j++)
88 for (
size_t j = 0 ; j < NADD ; j++)
90 double accu1 = v1.template get<P::x>(j);
91 double accu2 = v1.template get<P::y>(j);
92 double accu3 = v1.template get<P::z>(j);
93 double accu4 = v1.template get<P::s>(j);
95 double accu5 = v1.template get<P::v>(j)[0];
96 double accu6 = v1.template get<P::v>(j)[1];
97 double accu7 = v1.template get<P::v>(j)[2];
99 double accu8 = v1.template get<P::t>(j)[0][0];
100 double accu9 = v1.template get<P::t>(j)[0][1];
101 double accu10 = v1.template get<P::t>(j)[0][2];
102 double accu11 = v1.template get<P::t>(j)[1][0];
103 double accu12 = v1.template get<P::t>(j)[1][1];
104 double accu13 = v1.template get<P::t>(j)[1][2];
105 double accu14 = v1.template get<P::t>(j)[2][0];
106 double accu15 = v1.template get<P::t>(j)[2][1];
107 double accu16 = v1.template get<P::t>(j)[2][2];
109 tot_accu += accu1 + accu2 + accu3 + accu4 + accu5 + accu6 + accu7 + accu8 + accu9 + accu10 + accu11 + accu12 +
110 accu13 + accu14 + accu15 + accu16;
120 standard_deviation(times,mean,dev);
122 report_vector_funcs.graphs.put(
"performance.vector(0).y.data.mean",mean);
123 report_vector_funcs.graphs.put(
"performance.vector(0).y.data.dev",dev);
125 standard_deviation(times_g,mean,dev);
127 report_vector_funcs.graphs.put(
"performance.vector(1).y.data.mean",mean);
128 report_vector_funcs.graphs.put(
"performance.vector(1).y.data.dev",dev);
131template<
typename vector_prop_type,
typename vector_pos_type>
132__device__ __host__
void read_write(vector_prop_type & vd_prop, vector_pos_type & vd_pos,
unsigned int p)
134 vd_prop.template get<0>(p) = vd_pos.template get<0>(p)[0] + vd_pos.template get<0>(p)[1];
136 vd_prop.template get<1>(p)[0] = vd_pos.template get<0>(p)[0];
137 vd_prop.template get<1>(p)[1] = vd_pos.template get<0>(p)[1];
139 vd_prop.template get<2>(p)[0][0] = vd_pos.template get<0>(p)[0];
140 vd_prop.template get<2>(p)[0][1] = vd_pos.template get<0>(p)[1];
141 vd_prop.template get<2>(p)[1][0] = vd_pos.template get<0>(p)[0] +
142 vd_pos.template get<0>(p)[1];
143 vd_prop.template get<2>(p)[1][1] = vd_pos.template get<0>(p)[1] -
144 vd_pos.template get<0>(p)[0];
146 vd_pos.template get<0>(p)[0] += 0.01f;
147 vd_pos.template get<0>(p)[1] += 0.01f;
150template<
typename vector_type1,
typename vector_type2>
151__global__
void read_write_ker(vector_type1 v1,
vector_type2 v2)
153 unsigned int p = + blockIdx.x * blockDim.x + threadIdx.x;
166__device__ __host__
void read_write_lin(
double * pos,
ele * prp,
unsigned int p)
168 prp[p].s = pos[2*p] + pos[2*p+1];
170 prp[p].v[0] = pos[2*p];
171 prp[p].v[1] = pos[2*p+1];
173 prp[p].t[0][0] = pos[2*p];
174 prp[p].t[0][1] = pos[2*p+1];
175 prp[p].t[1][0] = pos[2*p] + pos[2*p+1];
176 prp[p].t[1][1] = pos[2*p+1] - pos[2*p];
183__global__
void read_write_lin_ker(
double * pos,
ele * prp)
185 unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
187 read_write_lin(pos,prp,p);
190__device__ __host__
void read_write_inte(
double * pos,
double * prp0,
double * prp1,
double * prp2,
unsigned int p,
unsigned int n_pos)
192 prp0[0*n_pos + p] = pos[0*n_pos + p] + pos[1*n_pos+p];
194 prp1[0*n_pos + p] = pos[0*n_pos + p];
195 prp1[1*n_pos + p] = pos[1*n_pos + p];
197 prp2[0*n_pos*2+0*n_pos + p] = pos[0*n_pos + p];
198 prp2[0*n_pos*2+1*n_pos + p] = pos[1*n_pos + p];
199 prp2[1*n_pos*2+0*n_pos + p] = pos[0*n_pos + p] +
201 prp2[1*n_pos*2+1*n_pos + p] = pos[1*n_pos + p] -
204 pos[0*n_pos + p] += 0.01f;
205 pos[1*n_pos + p] += 0.01f;
208__global__
void read_write_inte_ker(
double * pos,
double * prp0,
double * prp1,
double * prp2,
unsigned int n_pos)
210 unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
212 read_write_inte(pos,prp0,prp1,prp2,p,n_pos);
215BOOST_AUTO_TEST_CASE(vector_performance_layout_vs_plain_array)
217 std::vector<double> times(N_STAT + 1);
218 std::vector<double> times_g(N_STAT + 1);
220 std::vector<double> times2(N_STAT + 1);
221 std::vector<double> times2_g(N_STAT + 1);
223 report_vector_funcs.graphs.put(
"performance.vector_layout(0).funcs.nele",NADD);
224 report_vector_funcs.graphs.put(
"performance.vector_layout(0).funcs.name",
"read_write_lin");
226 for (
size_t i = 0 ; i < N_STAT+1 ; i++)
240 pa.
get<1>()[0] = 1.0;
241 pa.
get<1>()[1] = 1.0;
243 pa.
get<2>()[0][0] = 1.0;
244 pa.
get<2>()[0][1] = 1.0;
245 pa.
get<2>()[1][0] = 1.0;
246 pa.
get<2>()[1][1] = 1.0;
250 for (
size_t j = 0 ; j < NADD ; j++)
259 for (
size_t j = 0 ; j < NADD ; j++)
271 double * prp = (
double *)v1.getPointer<0>();
272 double * pos = (
double *)v2.getPointer<0>();
274 for (
size_t j = 0 ; j < NADD ; j++)
276 read_write_lin(pos,(
struct ele *)prp,j);
286 standard_deviation(times_g,mean,dev);
290 standard_deviation(times,mean_,dev_);
292 report_vector_funcs.graphs.put(
"performance.vector_layout(0).y.data.mean",mean_/mean);
296 report_vector_funcs.graphs.put(
"performance.vector_layout(0).y.data.dev",mean_/(mean*mean)*dev + dev_ / mean );
298 report_vector_funcs.graphs.put(
"performance.vector_layout(1).funcs.nele",NADD);
299 report_vector_funcs.graphs.put(
"performance.vector_layout(1).funcs.name",
"read_write_inte");
301 for (
size_t i = 0 ; i < N_STAT+1 ; i++)
315 pa.
get<1>()[0] = 1.0;
316 pa.
get<1>()[1] = 1.0;
318 pa.
get<2>()[0][0] = 1.0;
319 pa.
get<2>()[0][1] = 1.0;
320 pa.
get<2>()[1][0] = 1.0;
321 pa.
get<2>()[1][1] = 1.0;
325 for (
size_t j = 0 ; j < NADD ; j++)
334 for (
size_t j = 0 ; j < NADD ; j++)
341 times2_g[i] = tg.
getwct();
347 double * prp0 = (
double *)v1.getPointer<0>();
348 double * prp1 = (
double *)v1.getPointer<1>();
349 double * prp2 = (
double *)v1.getPointer<2>();
351 double * pos = (
double *)v2.getPointer<0>();
353 for (
size_t j = 0 ; j < NADD ; j++)
355 read_write_inte(pos,prp0,prp1,prp2,j,sz);
365 standard_deviation(times2_g,mean2,dev2);
369 standard_deviation(times2,mean2_,dev2_);
371 report_vector_funcs.graphs.put(
"performance.vector_layout(1).y.data.mean",mean2_/mean2);
375 report_vector_funcs.graphs.put(
"performance.vector_layout(1).y.data.dev",mean2_/(mean2*mean2)*dev2 + dev2_ / mean2 );
378BOOST_AUTO_TEST_CASE(vector_performance_gpu_layout_vs_plain_array)
380 std::vector<double> times(N_STAT + 1);
381 std::vector<double> times_g(N_STAT + 1);
383 std::vector<double> times2(N_STAT + 1);
384 std::vector<double> times2_g(N_STAT + 1);
387 double tot_accu = 0.0;
389 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(0).funcs.nele",NADD_GPU);
390 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(0).funcs.name",
"read_write_lin");
392 for (
size_t i = 0 ; i < N_STAT+1 ; i++)
406 pa.
get<1>()[0] = 1.0;
407 pa.
get<1>()[1] = 1.0;
409 pa.
get<2>()[0][0] = 1.0;
410 pa.
get<2>()[0][1] = 1.0;
411 pa.
get<2>()[1][0] = 1.0;
412 pa.
get<2>()[1][1] = 1.0;
416 for (
size_t j = 0 ; j < NADD_GPU ; j++)
422 auto ite = v1.getGPUIterator(1536);
428 CUDA_LAUNCH(read_write_ker,ite,v1.toKernel(),v2.toKernel());
431 times_g[i] = tga.getwctGPU();
434 std::cout <<
"OpenFPM: " << times_g[i] << std::endl;
439 double * prp = (
double *)v1.toKernel().getPointer<0>();
440 double * pos = (
double *)v2.toKernel().getPointer<0>();
442 CUDA_LAUNCH(read_write_lin_ker,ite,pos,(
struct ele *)prp);
446 times[i] = tga2.getwctGPU();
447 std::cout <<
"Array: " << times[i] << std::endl;
452 standard_deviation(times_g,mean,dev);
456 standard_deviation(times,mean_,dev_);
458 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(0).y.data.mean",mean_/mean);
462 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(0).y.data.dev",mean_/(mean*mean)*dev + dev_ / mean );
464 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(1).funcs.nele",NADD);
465 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(1).funcs.name",
"read_write_inte");
467 for (
size_t i = 0 ; i < N_STAT+1 ; i++)
481 pa.
get<1>()[0] = 1.0;
482 pa.
get<1>()[1] = 1.0;
484 pa.
get<2>()[0][0] = 1.0;
485 pa.
get<2>()[0][1] = 1.0;
486 pa.
get<2>()[1][0] = 1.0;
487 pa.
get<2>()[1][1] = 1.0;
491 for (
size_t j = 0 ; j < NADD_GPU ; j++)
500 auto ite = v1.getGPUIterator(1536);
502 CUDA_LAUNCH(read_write_ker,ite,v1.toKernel(),v2.toKernel());
506 times2_g[i] = tg.getwctGPU();
507 std::cout <<
"OpenFPM inte: " << times2_g[i] << std::endl;
514 double * prp0 = (
double *)v1.toKernel().getPointer<0>();
515 double * prp1 = (
double *)v1.toKernel().getPointer<1>();
516 double * prp2 = (
double *)v1.toKernel().getPointer<2>();
518 double * pos = (
double *)v2.toKernel().getPointer<0>();
520 CUDA_LAUNCH(read_write_inte_ker,ite,pos,prp0,prp1,prp2,sz);
524 times2[i] = tga.getwctGPU();
526 std::cout <<
"Array inte: " << times2[i] << std::endl;
531 standard_deviation(times2_g,mean2,dev2);
535 standard_deviation(times2,mean2_,dev2_);
537 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(1).y.data.mean",mean2_/mean2);
541 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(1).y.data.dev",mean2_/(mean2*mean2)*dev2 + dev2_ / mean2 );
544BOOST_AUTO_TEST_CASE(vector_performance_write_report)
548 report_vector_funcs.graphs.put(
"graphs.graph(0).type",
"line");
549 report_vector_funcs.graphs.add(
"graphs.graph(0).title",
"Vector add and get");
550 report_vector_funcs.graphs.add(
"graphs.graph(0).x.title",
"Tests");
551 report_vector_funcs.graphs.add(
"graphs.graph(0).y.title",
"Time seconds");
552 report_vector_funcs.graphs.add(
"graphs.graph(0).y.data(0).source",
"performance.vector(#).y.data.mean");
553 report_vector_funcs.graphs.add(
"graphs.graph(0).x.data(0).source",
"performance.vector(#).funcs.name");
554 report_vector_funcs.graphs.add(
"graphs.graph(0).y.data(0).title",
"Actual");
555 report_vector_funcs.graphs.add(
"graphs.graph(0).interpolation",
"lines");
557 report_vector_funcs.graphs.put(
"graphs.graph(1).type",
"line");
558 report_vector_funcs.graphs.add(
"graphs.graph(1).title",
"Vector read write");
559 report_vector_funcs.graphs.add(
"graphs.graph(1).x.title",
"Layout");
560 report_vector_funcs.graphs.add(
"graphs.graph(1).y.title",
"Time seconds");
561 report_vector_funcs.graphs.add(
"graphs.graph(1).y.data(0).source",
"performance.vector_layout(#).y.data.mean");
562 report_vector_funcs.graphs.add(
"graphs.graph(1).x.data(0).source",
"performance.vector_layout(#).funcs.name");
563 report_vector_funcs.graphs.add(
"graphs.graph(1).y.data(0).title",
"Actual");
564 report_vector_funcs.graphs.add(
"graphs.graph(1).interpolation",
"lines");
566 report_vector_funcs.graphs.put(
"graphs.graph(2).type",
"line");
567 report_vector_funcs.graphs.add(
"graphs.graph(2).title",
"Vector GPU read write");
568 report_vector_funcs.graphs.add(
"graphs.graph(2).x.title",
"Layout");
569 report_vector_funcs.graphs.add(
"graphs.graph(2).y.title",
"Time seconds");
570 report_vector_funcs.graphs.add(
"graphs.graph(2).y.data(0).source",
"performance.vector_layout_gpu(#).y.data.mean");
571 report_vector_funcs.graphs.add(
"graphs.graph(2).x.data(0).source",
"performance.vector_layout_gpu(#).funcs.name");
572 report_vector_funcs.graphs.add(
"graphs.graph(2).y.data(0).title",
"Actual");
573 report_vector_funcs.graphs.add(
"graphs.graph(2).interpolation",
"lines");
575 boost::property_tree::xml_writer_settings<std::string> settings(
' ', 4);
576 boost::property_tree::write_xml(
"vector_performance_funcs.xml", report_vector_funcs.graphs,std::locale(),settings);
580 std::string file_xml_ref(test_dir);
581 file_xml_ref += std::string(
"/openfpm_data/vector_performance_funcs_ref.xml");
583 StandardXMLPerformanceGraph(
"vector_performance_funcs.xml",file_xml_ref,cg);
585 addUpdateTime(cg,1,
"data",
"vector_performance_funcs");
587 cg.
write(
"vector_performance_funcs.html");
590BOOST_AUTO_TEST_SUITE_END()
592BOOST_AUTO_TEST_SUITE_END()
Small class to produce graph with Google chart in HTML.
void write(std::string file)
It write the graphs on file in html format using Google charts.
This class allocate, and destroy CPU memory.
Test structure used for several test.
void sety(T y_)
set the y property
void setz(T z_)
set the z property
auto get() -> decltype(boost::fusion::at_c< i >(data))
getter method for a general property i
static const unsigned int v
v property is at position 4 in the boost::fusion::vector
void sets(T s_)
set the s property
void setx(T x_)
set the x property
static const unsigned int t
t property is at position 5 in the boost::fusion::vector
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
__device__ __host__ boost::mpl::at< type, boost::mpl::int_< i > >::type & get()
get the properties i
Transform the boost::fusion::vector into memory specification (memory_traits)