1 #define BOOST_TEST_DYN_LINK
2 #include <boost/test/unit_test.hpp>
3 #include "Plot/GoogleChart.hpp"
5 #include <boost/property_tree/ptree.hpp>
6 #include <boost/property_tree/xml_parser.hpp>
7 #include "util/performance/performance_util.hpp"
8 #include "Point_test.hpp"
9 #include "util/stat/common_statistics.hpp"
11 extern const char * test_dir;
15 constexpr
int N_STAT = 32;
17 BOOST_AUTO_TEST_SUITE( performance )
19 #define NADD 128*128*128
20 #define NADD_GPU 256*256*256
25 boost::property_tree::ptree graphs;
30 BOOST_AUTO_TEST_SUITE( vector_performance )
32 BOOST_AUTO_TEST_CASE(vector_performance)
34 report_vector_funcs.graphs.put(
"performance.vector(0).funcs.nele",NADD);
35 report_vector_funcs.graphs.put(
"performance.vector(0).funcs.name",
"add");
37 report_vector_funcs.graphs.put(
"performance.vector(1).funcs.nele",NADD);
38 report_vector_funcs.graphs.put(
"performance.vector(1).funcs.name",
"get");
40 std::vector<double> times(N_STAT + 1);
41 std::vector<double> times_g(N_STAT + 1);
44 double tot_accu = 0.0;
46 for (
size_t i = 0 ; i < N_STAT+1 ; i++)
61 p.get<
P::v>()[0] = 1.0;
62 p.get<
P::v>()[1] = 2.0;
63 p.get<
P::v>()[2] = 7.0;
65 p.get<
P::t>()[0][0] = 10.0;
66 p.get<
P::t>()[0][1] = 13.0;
67 p.get<
P::t>()[0][2] = 8.0;
68 p.get<
P::t>()[1][0] = 19.0;
69 p.get<
P::t>()[1][1] = 23.0;
70 p.get<
P::t>()[1][2] = 5.0;
71 p.get<
P::t>()[2][0] = 4.0;
72 p.get<
P::t>()[2][1] = 3.0;
73 p.get<
P::t>()[2][2] = 11.0;
77 for (
size_t j = 0 ; j < NADD ; j++)
83 times[i] = t.getwct();
88 for (
size_t j = 0 ; j < NADD ; j++)
90 double accu1 = v1.template get<P::x>(j);
91 double accu2 = v1.template get<P::y>(j);
92 double accu3 = v1.template get<P::z>(j);
93 double accu4 = v1.template get<P::s>(j);
95 double accu5 = v1.template get<P::v>(j)[0];
96 double accu6 = v1.template get<P::v>(j)[1];
97 double accu7 = v1.template get<P::v>(j)[2];
99 double accu8 = v1.template get<P::t>(j)[0][0];
100 double accu9 = v1.template get<P::t>(j)[0][1];
101 double accu10 = v1.template get<P::t>(j)[0][2];
102 double accu11 = v1.template get<P::t>(j)[1][0];
103 double accu12 = v1.template get<P::t>(j)[1][1];
104 double accu13 = v1.template get<P::t>(j)[1][2];
105 double accu14 = v1.template get<P::t>(j)[2][0];
106 double accu15 = v1.template get<P::t>(j)[2][1];
107 double accu16 = v1.template get<P::t>(j)[2][2];
109 tot_accu += accu1 + accu2 + accu3 + accu4 + accu5 + accu6 + accu7 + accu8 + accu9 + accu10 + accu11 + accu12 +
110 accu13 + accu14 + accu15 + accu16;
120 standard_deviation(times,mean,dev);
122 report_vector_funcs.graphs.put(
"performance.vector(0).y.data.mean",mean);
123 report_vector_funcs.graphs.put(
"performance.vector(0).y.data.dev",dev);
125 standard_deviation(times_g,mean,dev);
127 report_vector_funcs.graphs.put(
"performance.vector(1).y.data.mean",mean);
128 report_vector_funcs.graphs.put(
"performance.vector(1).y.data.dev",dev);
131 template<
typename vector_prop_type,
typename vector_pos_type>
132 __device__ __host__
void read_write(vector_prop_type & vd_prop, vector_pos_type & vd_pos,
unsigned int p)
134 vd_prop.template get<0>(p) = vd_pos.template get<0>(p)[0] + vd_pos.template get<0>(p)[1];
136 vd_prop.template get<1>(p)[0] = vd_pos.template get<0>(p)[0];
137 vd_prop.template get<1>(p)[1] = vd_pos.template get<0>(p)[1];
139 vd_prop.template get<2>(p)[0][0] = vd_pos.template get<0>(p)[0];
140 vd_prop.template get<2>(p)[0][1] = vd_pos.template get<0>(p)[1];
141 vd_prop.template get<2>(p)[1][0] = vd_pos.template get<0>(p)[0] +
142 vd_pos.template get<0>(p)[1];
143 vd_prop.template get<2>(p)[1][1] = vd_pos.template get<0>(p)[1] -
144 vd_pos.template get<0>(p)[0];
146 vd_pos.template get<0>(p)[0] += 0.01f;
147 vd_pos.template get<0>(p)[1] += 0.01f;
150 template<
typename vector_type1,
typename vector_type2>
151 __global__
void read_write_ker(vector_type1 v1,
vector_type2 v2)
153 unsigned int p = + blockIdx.x * blockDim.x + threadIdx.x;
166 __device__ __host__
void read_write_lin(
double * pos,
ele * prp,
unsigned int p)
168 prp[p].s = pos[2*p] + pos[2*p+1];
170 prp[p].v[0] = pos[2*p];
171 prp[p].v[1] = pos[2*p+1];
173 prp[p].t[0][0] = pos[2*p];
174 prp[p].t[0][1] = pos[2*p+1];
175 prp[p].t[1][0] = pos[2*p] + pos[2*p+1];
176 prp[p].t[1][1] = pos[2*p+1] - pos[2*p];
183 __global__
void read_write_lin_ker(
double * pos,
ele * prp)
185 unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
187 read_write_lin(pos,prp,p);
190 __device__ __host__
void read_write_inte(
double * pos,
double * prp0,
double * prp1,
double * prp2,
unsigned int p,
unsigned int n_pos)
192 prp0[0*n_pos + p] = pos[0*n_pos + p] + pos[1*n_pos+p];
194 prp1[0*n_pos + p] = pos[0*n_pos + p];
195 prp1[1*n_pos + p] = pos[1*n_pos + p];
197 prp2[0*n_pos*2+0*n_pos + p] = pos[0*n_pos + p];
198 prp2[0*n_pos*2+1*n_pos + p] = pos[1*n_pos + p];
199 prp2[1*n_pos*2+0*n_pos + p] = pos[0*n_pos + p] +
201 prp2[1*n_pos*2+1*n_pos + p] = pos[1*n_pos + p] -
204 pos[0*n_pos + p] += 0.01f;
205 pos[1*n_pos + p] += 0.01f;
208 __global__
void read_write_inte_ker(
double * pos,
double * prp0,
double * prp1,
double * prp2,
unsigned int n_pos)
210 unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
212 read_write_inte(pos,prp0,prp1,prp2,p,n_pos);
215 BOOST_AUTO_TEST_CASE(vector_performance_layout_vs_plain_array)
217 std::vector<double> times(N_STAT + 1);
218 std::vector<double> times_g(N_STAT + 1);
220 std::vector<double> times2(N_STAT + 1);
221 std::vector<double> times2_g(N_STAT + 1);
223 report_vector_funcs.graphs.put(
"performance.vector_layout(0).funcs.nele",NADD);
224 report_vector_funcs.graphs.put(
"performance.vector_layout(0).funcs.name",
"read_write_lin");
226 for (
size_t i = 0 ; i < N_STAT+1 ; i++)
240 pa.
get<1>()[0] = 1.0;
241 pa.
get<1>()[1] = 1.0;
243 pa.
get<2>()[0][0] = 1.0;
244 pa.
get<2>()[0][1] = 1.0;
245 pa.
get<2>()[1][0] = 1.0;
246 pa.
get<2>()[1][1] = 1.0;
250 for (
size_t j = 0 ; j < NADD ; j++)
259 for (
size_t j = 0 ; j < NADD ; j++)
271 double * prp = (
double *)v1.getPointer<0>();
272 double * pos = (
double *)v2.getPointer<0>();
274 for (
size_t j = 0 ; j < NADD ; j++)
276 read_write_lin(pos,(
struct ele *)prp,j);
286 standard_deviation(times_g,mean,dev);
290 standard_deviation(times,mean_,dev_);
292 report_vector_funcs.graphs.put(
"performance.vector_layout(0).y.data.mean",mean_/mean);
296 report_vector_funcs.graphs.put(
"performance.vector_layout(0).y.data.dev",mean_/(mean*mean)*dev + dev_ / mean );
298 report_vector_funcs.graphs.put(
"performance.vector_layout(1).funcs.nele",NADD);
299 report_vector_funcs.graphs.put(
"performance.vector_layout(1).funcs.name",
"read_write_inte");
301 for (
size_t i = 0 ; i < N_STAT+1 ; i++)
315 pa.
get<1>()[0] = 1.0;
316 pa.
get<1>()[1] = 1.0;
318 pa.
get<2>()[0][0] = 1.0;
319 pa.
get<2>()[0][1] = 1.0;
320 pa.
get<2>()[1][0] = 1.0;
321 pa.
get<2>()[1][1] = 1.0;
325 for (
size_t j = 0 ; j < NADD ; j++)
334 for (
size_t j = 0 ; j < NADD ; j++)
341 times2_g[i] = tg.
getwct();
347 double * prp0 = (
double *)v1.getPointer<0>();
348 double * prp1 = (
double *)v1.getPointer<1>();
349 double * prp2 = (
double *)v1.getPointer<2>();
351 double * pos = (
double *)v2.getPointer<0>();
353 for (
size_t j = 0 ; j < NADD ; j++)
355 read_write_inte(pos,prp0,prp1,prp2,j,sz);
365 standard_deviation(times2_g,mean2,dev2);
369 standard_deviation(times2,mean2_,dev2_);
371 report_vector_funcs.graphs.put(
"performance.vector_layout(1).y.data.mean",mean2_/mean2);
375 report_vector_funcs.graphs.put(
"performance.vector_layout(1).y.data.dev",mean2_/(mean2*mean2)*dev2 + dev2_ / mean2 );
378 BOOST_AUTO_TEST_CASE(vector_performance_gpu_layout_vs_plain_array)
380 std::vector<double> times(N_STAT + 1);
381 std::vector<double> times_g(N_STAT + 1);
383 std::vector<double> times2(N_STAT + 1);
384 std::vector<double> times2_g(N_STAT + 1);
387 double tot_accu = 0.0;
389 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(0).funcs.nele",NADD_GPU);
390 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(0).funcs.name",
"read_write_lin");
392 for (
size_t i = 0 ; i < N_STAT+1 ; i++)
406 pa.
get<1>()[0] = 1.0;
407 pa.
get<1>()[1] = 1.0;
409 pa.
get<2>()[0][0] = 1.0;
410 pa.
get<2>()[0][1] = 1.0;
411 pa.
get<2>()[1][0] = 1.0;
412 pa.
get<2>()[1][1] = 1.0;
416 for (
size_t j = 0 ; j < NADD_GPU ; j++)
422 auto ite = v1.getGPUIterator(1536);
428 CUDA_LAUNCH(read_write_ker,ite,v1.toKernel(),v2.toKernel());
431 times_g[i] = tga.getwctGPU();
434 std::cout <<
"OpenFPM: " << times_g[i] << std::endl;
439 double * prp = (
double *)v1.toKernel().getPointer<0>();
440 double * pos = (
double *)v2.toKernel().getPointer<0>();
442 CUDA_LAUNCH(read_write_lin_ker,ite,pos,(
struct ele *)prp);
446 times[i] = tga2.getwctGPU();
447 std::cout <<
"Array: " << times[i] << std::endl;
452 standard_deviation(times_g,mean,dev);
456 standard_deviation(times,mean_,dev_);
458 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(0).y.data.mean",mean_/mean);
462 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(0).y.data.dev",mean_/(mean*mean)*dev + dev_ / mean );
464 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(1).funcs.nele",NADD);
465 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(1).funcs.name",
"read_write_inte");
467 for (
size_t i = 0 ; i < N_STAT+1 ; i++)
481 pa.
get<1>()[0] = 1.0;
482 pa.
get<1>()[1] = 1.0;
484 pa.
get<2>()[0][0] = 1.0;
485 pa.
get<2>()[0][1] = 1.0;
486 pa.
get<2>()[1][0] = 1.0;
487 pa.
get<2>()[1][1] = 1.0;
491 for (
size_t j = 0 ; j < NADD_GPU ; j++)
500 auto ite = v1.getGPUIterator(1536);
502 CUDA_LAUNCH(read_write_ker,ite,v1.toKernel(),v2.toKernel());
506 times2_g[i] = tg.getwctGPU();
507 std::cout <<
"OpenFPM inte: " << times2_g[i] << std::endl;
514 double * prp0 = (
double *)v1.toKernel().getPointer<0>();
515 double * prp1 = (
double *)v1.toKernel().getPointer<1>();
516 double * prp2 = (
double *)v1.toKernel().getPointer<2>();
518 double * pos = (
double *)v2.toKernel().getPointer<0>();
520 CUDA_LAUNCH(read_write_inte_ker,ite,pos,prp0,prp1,prp2,sz);
524 times2[i] = tga.getwctGPU();
526 std::cout <<
"Array inte: " << times2[i] << std::endl;
531 standard_deviation(times2_g,mean2,dev2);
535 standard_deviation(times2,mean2_,dev2_);
537 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(1).y.data.mean",mean2_/mean2);
541 report_vector_funcs.graphs.put(
"performance.vector_layout_gpu(1).y.data.dev",mean2_/(mean2*mean2)*dev2 + dev2_ / mean2 );
544 BOOST_AUTO_TEST_CASE(vector_performance_write_report)
548 report_vector_funcs.graphs.put(
"graphs.graph(0).type",
"line");
549 report_vector_funcs.graphs.add(
"graphs.graph(0).title",
"Vector add and get");
550 report_vector_funcs.graphs.add(
"graphs.graph(0).x.title",
"Tests");
551 report_vector_funcs.graphs.add(
"graphs.graph(0).y.title",
"Time seconds");
552 report_vector_funcs.graphs.add(
"graphs.graph(0).y.data(0).source",
"performance.vector(#).y.data.mean");
553 report_vector_funcs.graphs.add(
"graphs.graph(0).x.data(0).source",
"performance.vector(#).funcs.name");
554 report_vector_funcs.graphs.add(
"graphs.graph(0).y.data(0).title",
"Actual");
555 report_vector_funcs.graphs.add(
"graphs.graph(0).interpolation",
"lines");
557 report_vector_funcs.graphs.put(
"graphs.graph(1).type",
"line");
558 report_vector_funcs.graphs.add(
"graphs.graph(1).title",
"Vector read write");
559 report_vector_funcs.graphs.add(
"graphs.graph(1).x.title",
"Layout");
560 report_vector_funcs.graphs.add(
"graphs.graph(1).y.title",
"Time seconds");
561 report_vector_funcs.graphs.add(
"graphs.graph(1).y.data(0).source",
"performance.vector_layout(#).y.data.mean");
562 report_vector_funcs.graphs.add(
"graphs.graph(1).x.data(0).source",
"performance.vector_layout(#).funcs.name");
563 report_vector_funcs.graphs.add(
"graphs.graph(1).y.data(0).title",
"Actual");
564 report_vector_funcs.graphs.add(
"graphs.graph(1).interpolation",
"lines");
566 report_vector_funcs.graphs.put(
"graphs.graph(2).type",
"line");
567 report_vector_funcs.graphs.add(
"graphs.graph(2).title",
"Vector GPU read write");
568 report_vector_funcs.graphs.add(
"graphs.graph(2).x.title",
"Layout");
569 report_vector_funcs.graphs.add(
"graphs.graph(2).y.title",
"Time seconds");
570 report_vector_funcs.graphs.add(
"graphs.graph(2).y.data(0).source",
"performance.vector_layout_gpu(#).y.data.mean");
571 report_vector_funcs.graphs.add(
"graphs.graph(2).x.data(0).source",
"performance.vector_layout_gpu(#).funcs.name");
572 report_vector_funcs.graphs.add(
"graphs.graph(2).y.data(0).title",
"Actual");
573 report_vector_funcs.graphs.add(
"graphs.graph(2).interpolation",
"lines");
575 boost::property_tree::xml_writer_settings<std::string> settings(
' ', 4);
576 boost::property_tree::write_xml(
"vector_performance_funcs.xml", report_vector_funcs.graphs,std::locale(),settings);
580 std::string file_xml_ref(test_dir);
581 file_xml_ref += std::string(
"/openfpm_data/vector_performance_funcs_ref.xml");
583 StandardXMLPerformanceGraph(
"vector_performance_funcs.xml",file_xml_ref,cg);
585 addUpdateTime(cg,1,
"data",
"vector_performance_funcs");
587 cg.
write(
"vector_performance_funcs.html");
590 BOOST_AUTO_TEST_SUITE_END()
592 BOOST_AUTO_TEST_SUITE_END()
Small class to produce graph with Google chart in HTML.
void write(std::string file)
It write the graphs on file in html format using Google charts.
This class allocate, and destroy CPU memory.
Test structure used for several test.
static const unsigned int v
v property is at position 4 in the boost::fusion::vector
static const unsigned int t
t property is at position 5 in the boost::fusion::vector
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
__device__ __host__ boost::mpl::at< type, boost::mpl::int_< i > >::type & get()
get the properties i
Transform the boost::fusion::vector into memory specification (memory_traits)