OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
vector_performance_test.cu
1 #define BOOST_TEST_DYN_LINK
2 #include <boost/test/unit_test.hpp>
3 #include "Plot/GoogleChart.hpp"
4 #include "timer.hpp"
5 #include <boost/property_tree/ptree.hpp>
6 #include <boost/property_tree/xml_parser.hpp>
7 #include "util/performance/performance_util.hpp"
8 #include "Point_test.hpp"
9 #include "util/stat/common_statistics.hpp"
10 
11 extern const char * test_dir;
12 
13 typedef Point_test<float> P;
14 
15 constexpr int N_STAT = 32;
16 
17 BOOST_AUTO_TEST_SUITE( performance )
18 
19 #define NADD 128*128*128
20 #define NADD_GPU 256*256*256
21 
22 // Property tree
24 {
25  boost::property_tree::ptree graphs;
26 };
27 
28 report_vector_func_tests report_vector_funcs;
29 
30 BOOST_AUTO_TEST_SUITE( vector_performance )
31 
32 BOOST_AUTO_TEST_CASE(vector_performance)
33 {
34  report_vector_funcs.graphs.put("performance.vector(0).funcs.nele",NADD);
35  report_vector_funcs.graphs.put("performance.vector(0).funcs.name","add");
36 
37  report_vector_funcs.graphs.put("performance.vector(1).funcs.nele",NADD);
38  report_vector_funcs.graphs.put("performance.vector(1).funcs.name","get");
39 
40  std::vector<double> times(N_STAT + 1);
41  std::vector<double> times_g(N_STAT + 1);
42 
43  // get test
44  double tot_accu = 0.0;
45 
46  for (size_t i = 0 ; i < N_STAT+1 ; i++)
47  {
48  timer t;
49  t.start();
50 
51  // create a vector
53 
54  // Point
56  p.setx(1.0);
57  p.sety(2.0);
58  p.setz(3.0);
59  p.sets(4.0);
60 
61  p.get<P::v>()[0] = 1.0;
62  p.get<P::v>()[1] = 2.0;
63  p.get<P::v>()[2] = 7.0;
64 
65  p.get<P::t>()[0][0] = 10.0;
66  p.get<P::t>()[0][1] = 13.0;
67  p.get<P::t>()[0][2] = 8.0;
68  p.get<P::t>()[1][0] = 19.0;
69  p.get<P::t>()[1][1] = 23.0;
70  p.get<P::t>()[1][2] = 5.0;
71  p.get<P::t>()[2][0] = 4.0;
72  p.get<P::t>()[2][1] = 3.0;
73  p.get<P::t>()[2][2] = 11.0;
74 
75  // Add test
76 
77  for (size_t j = 0 ; j < NADD ; j++)
78  {
79  v1.add(p);
80  }
81 
82  t.stop();
83  times[i] = t.getwct();
84 
85  timer tg;
86  tg.start();
87 
88  for (size_t j = 0 ; j < NADD ; j++)
89  {
90  double accu1 = v1.template get<P::x>(j);
91  double accu2 = v1.template get<P::y>(j);
92  double accu3 = v1.template get<P::z>(j);
93  double accu4 = v1.template get<P::s>(j);
94 
95  double accu5 = v1.template get<P::v>(j)[0];
96  double accu6 = v1.template get<P::v>(j)[1];
97  double accu7 = v1.template get<P::v>(j)[2];
98 
99  double accu8 = v1.template get<P::t>(j)[0][0];
100  double accu9 = v1.template get<P::t>(j)[0][1];
101  double accu10 = v1.template get<P::t>(j)[0][2];
102  double accu11 = v1.template get<P::t>(j)[1][0];
103  double accu12 = v1.template get<P::t>(j)[1][1];
104  double accu13 = v1.template get<P::t>(j)[1][2];
105  double accu14 = v1.template get<P::t>(j)[2][0];
106  double accu15 = v1.template get<P::t>(j)[2][1];
107  double accu16 = v1.template get<P::t>(j)[2][2];
108 
109  tot_accu += accu1 + accu2 + accu3 + accu4 + accu5 + accu6 + accu7 + accu8 + accu9 + accu10 + accu11 + accu12 +
110  accu13 + accu14 + accu15 + accu16;
111  }
112 
113  tg.stop();
114 
115  times_g[i] = tg.getwct();
116  }
117 
118  double mean;
119  double dev;
120  standard_deviation(times,mean,dev);
121 
122  report_vector_funcs.graphs.put("performance.vector(0).y.data.mean",mean);
123  report_vector_funcs.graphs.put("performance.vector(0).y.data.dev",dev);
124 
125  standard_deviation(times_g,mean,dev);
126 
127  report_vector_funcs.graphs.put("performance.vector(1).y.data.mean",mean);
128  report_vector_funcs.graphs.put("performance.vector(1).y.data.dev",dev);
129 }
130 
131 template<typename vector_prop_type, typename vector_pos_type>
132 __device__ __host__ void read_write(vector_prop_type & vd_prop, vector_pos_type & vd_pos, unsigned int p)
133 {
134  vd_prop.template get<0>(p) = vd_pos.template get<0>(p)[0] + vd_pos.template get<0>(p)[1];
135 
136  vd_prop.template get<1>(p)[0] = vd_pos.template get<0>(p)[0];
137  vd_prop.template get<1>(p)[1] = vd_pos.template get<0>(p)[1];
138 
139  vd_prop.template get<2>(p)[0][0] = vd_pos.template get<0>(p)[0];
140  vd_prop.template get<2>(p)[0][1] = vd_pos.template get<0>(p)[1];
141  vd_prop.template get<2>(p)[1][0] = vd_pos.template get<0>(p)[0] +
142  vd_pos.template get<0>(p)[1];
143  vd_prop.template get<2>(p)[1][1] = vd_pos.template get<0>(p)[1] -
144  vd_pos.template get<0>(p)[0];
145 
146  vd_pos.template get<0>(p)[0] += 0.01f;
147  vd_pos.template get<0>(p)[1] += 0.01f;
148 }
149 
150 template<typename vector_type1, typename vector_type2>
151 __global__ void read_write_ker(vector_type1 v1, vector_type2 v2)
152 {
153  unsigned int p = + blockIdx.x * blockDim.x + threadIdx.x;
154 
155  read_write(v1,v2,p);
156 }
157 
158 
159 struct ele
160 {
161  double s;
162  double v[2];
163  double t[2][2];
164 };
165 
166 __device__ __host__ void read_write_lin(double * pos, ele * prp, unsigned int p)
167 {
168  prp[p].s = pos[2*p] + pos[2*p+1];
169 
170  prp[p].v[0] = pos[2*p];
171  prp[p].v[1] = pos[2*p+1];
172 
173  prp[p].t[0][0] = pos[2*p];
174  prp[p].t[0][1] = pos[2*p+1];
175  prp[p].t[1][0] = pos[2*p] + pos[2*p+1];
176  prp[p].t[1][1] = pos[2*p+1] - pos[2*p];
177 
178  pos[2*p] += 0.01f;
179  pos[2*p+1] += 0.01f;
180 }
181 
182 
183 __global__ void read_write_lin_ker(double * pos, ele * prp)
184 {
185  unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
186 
187  read_write_lin(pos,prp,p);
188 }
189 
190 __device__ __host__ void read_write_inte(double * pos, double * prp0, double * prp1, double * prp2, unsigned int p, unsigned int n_pos)
191 {
192  prp0[0*n_pos + p] = pos[0*n_pos + p] + pos[1*n_pos+p];
193 
194  prp1[0*n_pos + p] = pos[0*n_pos + p];
195  prp1[1*n_pos + p] = pos[1*n_pos + p];
196 
197  prp2[0*n_pos*2+0*n_pos + p] = pos[0*n_pos + p];
198  prp2[0*n_pos*2+1*n_pos + p] = pos[1*n_pos + p];
199  prp2[1*n_pos*2+0*n_pos + p] = pos[0*n_pos + p] +
200  pos[1*n_pos + p];
201  prp2[1*n_pos*2+1*n_pos + p] = pos[1*n_pos + p] -
202  pos[0*n_pos + p];
203 
204  pos[0*n_pos + p] += 0.01f;
205  pos[1*n_pos + p] += 0.01f;
206 }
207 
208 __global__ void read_write_inte_ker(double * pos, double * prp0, double * prp1, double * prp2, unsigned int n_pos)
209 {
210  unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
211 
212  read_write_inte(pos,prp0,prp1,prp2,p,n_pos);
213 }
214 
215 BOOST_AUTO_TEST_CASE(vector_performance_layout_vs_plain_array)
216 {
217  std::vector<double> times(N_STAT + 1);
218  std::vector<double> times_g(N_STAT + 1);
219 
220  std::vector<double> times2(N_STAT + 1);
221  std::vector<double> times2_g(N_STAT + 1);
222 
223  report_vector_funcs.graphs.put("performance.vector_layout(0).funcs.nele",NADD);
224  report_vector_funcs.graphs.put("performance.vector_layout(0).funcs.name","read_write_lin");
225 
226  for (size_t i = 0 ; i < N_STAT+1 ; i++)
227  {
228  // create a vector
231 
232  // Point
234  p.get<0>()[0] = 1.0;
235  p.get<0>()[1] = 2.0;
236 
238  pa.get<0>() = 1.0;
239 
240  pa.get<1>()[0] = 1.0;
241  pa.get<1>()[1] = 1.0;
242 
243  pa.get<2>()[0][0] = 1.0;
244  pa.get<2>()[0][1] = 1.0;
245  pa.get<2>()[1][0] = 1.0;
246  pa.get<2>()[1][1] = 1.0;
247 
248  // Add test
249 
250  for (size_t j = 0 ; j < NADD ; j++)
251  {
252  v1.add(pa);
253  v2.add(p);
254  }
255 
256  timer tg;
257  tg.start();
258 
259  for (size_t j = 0 ; j < NADD ; j++)
260  {
261  read_write(v1,v2,j);
262  }
263 
264  tg.stop();
265 
266  times_g[i] = tg.getwct();
267 
268  timer tga;
269  tga.start();
270 
271  double * prp = (double *)v1.getPointer<0>();
272  double * pos = (double *)v2.getPointer<0>();
273 
274  for (size_t j = 0 ; j < NADD ; j++)
275  {
276  read_write_lin(pos,(struct ele *)prp,j);
277  }
278 
279  tga.stop();
280 
281  times[i] = tga.getwct();
282  }
283 
284  double mean;
285  double dev;
286  standard_deviation(times_g,mean,dev);
287 
288  double mean_;
289  double dev_;
290  standard_deviation(times,mean_,dev_);
291 
292  report_vector_funcs.graphs.put("performance.vector_layout(0).y.data.mean",mean_/mean);
293 
294  // Deviation od x/y = x/y^2 dy + 1/y dx
295 
296  report_vector_funcs.graphs.put("performance.vector_layout(0).y.data.dev",mean_/(mean*mean)*dev + dev_ / mean );
297 
298  report_vector_funcs.graphs.put("performance.vector_layout(1).funcs.nele",NADD);
299  report_vector_funcs.graphs.put("performance.vector_layout(1).funcs.name","read_write_inte");
300 
301  for (size_t i = 0 ; i < N_STAT+1 ; i++)
302  {
303  // create a vector
306 
307  // Point
309  p.get<0>()[0] = 1.0;
310  p.get<0>()[1] = 2.0;
311 
313  pa.get<0>() = 1.0;
314 
315  pa.get<1>()[0] = 1.0;
316  pa.get<1>()[1] = 1.0;
317 
318  pa.get<2>()[0][0] = 1.0;
319  pa.get<2>()[0][1] = 1.0;
320  pa.get<2>()[1][0] = 1.0;
321  pa.get<2>()[1][1] = 1.0;
322 
323  // Add test
324 
325  for (size_t j = 0 ; j < NADD ; j++)
326  {
327  v1.add(pa);
328  v2.add(p);
329  }
330 
331  timer tg;
332  tg.start();
333 
334  for (size_t j = 0 ; j < NADD ; j++)
335  {
336  read_write(v1,v2,j);
337  }
338 
339  tg.stop();
340 
341  times2_g[i] = tg.getwct();
342  int sz = v1.size();
343 
344  timer tga;
345  tga.start();
346 
347  double * prp0 = (double *)v1.getPointer<0>();
348  double * prp1 = (double *)v1.getPointer<1>();
349  double * prp2 = (double *)v1.getPointer<2>();
350 
351  double * pos = (double *)v2.getPointer<0>();
352 
353  for (size_t j = 0 ; j < NADD ; j++)
354  {
355  read_write_inte(pos,prp0,prp1,prp2,j,sz);
356  }
357 
358  tga.stop();
359 
360  times2[i] = tga.getwct();
361  }
362 
363  double mean2;
364  double dev2;
365  standard_deviation(times2_g,mean2,dev2);
366 
367  double mean2_;
368  double dev2_;
369  standard_deviation(times2,mean2_,dev2_);
370 
371  report_vector_funcs.graphs.put("performance.vector_layout(1).y.data.mean",mean2_/mean2);
372 
373  // Deviation od x/y = x/y^2 dy + 1/y dx
374 
375  report_vector_funcs.graphs.put("performance.vector_layout(1).y.data.dev",mean2_/(mean2*mean2)*dev2 + dev2_ / mean2 );
376 }
377 
378 BOOST_AUTO_TEST_CASE(vector_performance_gpu_layout_vs_plain_array)
379 {
380  std::vector<double> times(N_STAT + 1);
381  std::vector<double> times_g(N_STAT + 1);
382 
383  std::vector<double> times2(N_STAT + 1);
384  std::vector<double> times2_g(N_STAT + 1);
385 
386  // get test
387  double tot_accu = 0.0;
388 
389  report_vector_funcs.graphs.put("performance.vector_layout_gpu(0).funcs.nele",NADD_GPU);
390  report_vector_funcs.graphs.put("performance.vector_layout_gpu(0).funcs.name","read_write_lin");
391 
392  for (size_t i = 0 ; i < N_STAT+1 ; i++)
393  {
394  // create a vector
397 
398  // Point
400  p.get<0>()[0] = 1.0;
401  p.get<0>()[1] = 2.0;
402 
404  pa.get<0>() = 1.0;
405 
406  pa.get<1>()[0] = 1.0;
407  pa.get<1>()[1] = 1.0;
408 
409  pa.get<2>()[0][0] = 1.0;
410  pa.get<2>()[0][1] = 1.0;
411  pa.get<2>()[1][0] = 1.0;
412  pa.get<2>()[1][1] = 1.0;
413 
414  // Add test
415 
416  for (size_t j = 0 ; j < NADD_GPU ; j++)
417  {
418  v1.add(pa);
419  v2.add(p);
420  }
421 
422  auto ite = v1.getGPUIterator(1536);
423 
424  {
425 
426  timer tga;
427  tga.startGPU();
428  CUDA_LAUNCH(read_write_ker,ite,v1.toKernel(),v2.toKernel());
429 
430  tga.stopGPU();
431  times_g[i] = tga.getwctGPU();
432  }
433 
434  std::cout << "OpenFPM: " << times_g[i] << std::endl;
435 
436  timer tga2;
437  tga2.startGPU();
438 
439  double * prp = (double *)v1.toKernel().getPointer<0>();
440  double * pos = (double *)v2.toKernel().getPointer<0>();
441 
442  CUDA_LAUNCH(read_write_lin_ker,ite,pos,(struct ele *)prp);
443 
444  tga2.stopGPU();
445 
446  times[i] = tga2.getwctGPU();
447  std::cout << "Array: " << times[i] << std::endl;
448  }
449 
450  double mean;
451  double dev;
452  standard_deviation(times_g,mean,dev);
453 
454  double mean_;
455  double dev_;
456  standard_deviation(times,mean_,dev_);
457 
458  report_vector_funcs.graphs.put("performance.vector_layout_gpu(0).y.data.mean",mean_/mean);
459 
460  // Deviation od x/y = x/y^2 dy + 1/y dx
461 
462  report_vector_funcs.graphs.put("performance.vector_layout_gpu(0).y.data.dev",mean_/(mean*mean)*dev + dev_ / mean );
463 
464  report_vector_funcs.graphs.put("performance.vector_layout_gpu(1).funcs.nele",NADD);
465  report_vector_funcs.graphs.put("performance.vector_layout_gpu(1).funcs.name","read_write_inte");
466 
467  for (size_t i = 0 ; i < N_STAT+1 ; i++)
468  {
469  // create a vector
472 
473  // Point
475  p.get<0>()[0] = 1.0;
476  p.get<0>()[1] = 2.0;
477 
479  pa.get<0>() = 1.0;
480 
481  pa.get<1>()[0] = 1.0;
482  pa.get<1>()[1] = 1.0;
483 
484  pa.get<2>()[0][0] = 1.0;
485  pa.get<2>()[0][1] = 1.0;
486  pa.get<2>()[1][0] = 1.0;
487  pa.get<2>()[1][1] = 1.0;
488 
489  // Add test
490 
491  for (size_t j = 0 ; j < NADD_GPU ; j++)
492  {
493  v1.add(pa);
494  v2.add(p);
495  }
496 
497  timer tg;
498  tg.startGPU();
499 
500  auto ite = v1.getGPUIterator(1536);
501 
502  CUDA_LAUNCH(read_write_ker,ite,v1.toKernel(),v2.toKernel());
503 
504  tg.stopGPU();
505 
506  times2_g[i] = tg.getwctGPU();
507  std::cout << "OpenFPM inte: " << times2_g[i] << std::endl;
508 
509  int sz = v1.size();
510 
511  timer tga;
512  tga.startGPU();
513 
514  double * prp0 = (double *)v1.toKernel().getPointer<0>();
515  double * prp1 = (double *)v1.toKernel().getPointer<1>();
516  double * prp2 = (double *)v1.toKernel().getPointer<2>();
517 
518  double * pos = (double *)v2.toKernel().getPointer<0>();
519 
520  CUDA_LAUNCH(read_write_inte_ker,ite,pos,prp0,prp1,prp2,sz);
521 
522  tga.stopGPU();
523 
524  times2[i] = tga.getwctGPU();
525 
526  std::cout << "Array inte: " << times2[i] << std::endl;
527  }
528 
529  double mean2;
530  double dev2;
531  standard_deviation(times2_g,mean2,dev2);
532 
533  double mean2_;
534  double dev2_;
535  standard_deviation(times2,mean2_,dev2_);
536 
537  report_vector_funcs.graphs.put("performance.vector_layout_gpu(1).y.data.mean",mean2_/mean2);
538 
539  // Deviation od x/y = x/y^2 dy + 1/y dx
540 
541  report_vector_funcs.graphs.put("performance.vector_layout_gpu(1).y.data.dev",mean2_/(mean2*mean2)*dev2 + dev2_ / mean2 );
542 }
543 
544 BOOST_AUTO_TEST_CASE(vector_performance_write_report)
545 {
546  // Create a graphs
547 
548  report_vector_funcs.graphs.put("graphs.graph(0).type","line");
549  report_vector_funcs.graphs.add("graphs.graph(0).title","Vector add and get");
550  report_vector_funcs.graphs.add("graphs.graph(0).x.title","Tests");
551  report_vector_funcs.graphs.add("graphs.graph(0).y.title","Time seconds");
552  report_vector_funcs.graphs.add("graphs.graph(0).y.data(0).source","performance.vector(#).y.data.mean");
553  report_vector_funcs.graphs.add("graphs.graph(0).x.data(0).source","performance.vector(#).funcs.name");
554  report_vector_funcs.graphs.add("graphs.graph(0).y.data(0).title","Actual");
555  report_vector_funcs.graphs.add("graphs.graph(0).interpolation","lines");
556 
557  report_vector_funcs.graphs.put("graphs.graph(1).type","line");
558  report_vector_funcs.graphs.add("graphs.graph(1).title","Vector read write");
559  report_vector_funcs.graphs.add("graphs.graph(1).x.title","Layout");
560  report_vector_funcs.graphs.add("graphs.graph(1).y.title","Time seconds");
561  report_vector_funcs.graphs.add("graphs.graph(1).y.data(0).source","performance.vector_layout(#).y.data.mean");
562  report_vector_funcs.graphs.add("graphs.graph(1).x.data(0).source","performance.vector_layout(#).funcs.name");
563  report_vector_funcs.graphs.add("graphs.graph(1).y.data(0).title","Actual");
564  report_vector_funcs.graphs.add("graphs.graph(1).interpolation","lines");
565 
566  report_vector_funcs.graphs.put("graphs.graph(2).type","line");
567  report_vector_funcs.graphs.add("graphs.graph(2).title","Vector GPU read write");
568  report_vector_funcs.graphs.add("graphs.graph(2).x.title","Layout");
569  report_vector_funcs.graphs.add("graphs.graph(2).y.title","Time seconds");
570  report_vector_funcs.graphs.add("graphs.graph(2).y.data(0).source","performance.vector_layout_gpu(#).y.data.mean");
571  report_vector_funcs.graphs.add("graphs.graph(2).x.data(0).source","performance.vector_layout_gpu(#).funcs.name");
572  report_vector_funcs.graphs.add("graphs.graph(2).y.data(0).title","Actual");
573  report_vector_funcs.graphs.add("graphs.graph(2).interpolation","lines");
574 
575  boost::property_tree::xml_writer_settings<std::string> settings(' ', 4);
576  boost::property_tree::write_xml("vector_performance_funcs.xml", report_vector_funcs.graphs,std::locale(),settings);
577 
578  GoogleChart cg;
579 
580  std::string file_xml_ref(test_dir);
581  file_xml_ref += std::string("/openfpm_data/vector_performance_funcs_ref.xml");
582 
583  StandardXMLPerformanceGraph("vector_performance_funcs.xml",file_xml_ref,cg);
584 
585  addUpdateTime(cg,1,"data","vector_performance_funcs");
586 
587  cg.write("vector_performance_funcs.html");
588 }
589 
590 BOOST_AUTO_TEST_SUITE_END()
591 
592 BOOST_AUTO_TEST_SUITE_END()
void setz(T z_)
set the z property
Definition: Point_test.hpp:190
__device__ __host__ boost::mpl::at< type, boost::mpl::int_< i > >::type & get()
get the properties i
Definition: aggregate.hpp:240
size_t size()
Stub size.
Definition: map_vector.hpp:211
This class allocate, and destroy CPU memory.
Definition: HeapMemory.hpp:39
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition: memory_conf.hpp:83
Small class to produce graph with Google chart in HTML.
void setx(T x_)
set the x property
Definition: Point_test.hpp:176
void start()
Start the timer.
Definition: timer.hpp:90
auto get() -> decltype(boost::fusion::at_c< i >(data))
getter method for a general property i
Definition: Point_test.hpp:219
void write(std::string file)
It write the graphs on file in html format using Google charts.
void sets(T s_)
set the s property
Definition: Point_test.hpp:197
void sety(T y_)
set the y property
Definition: Point_test.hpp:183
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:214
static const unsigned int t
t property is at position 5 in the boost::fusion::vector
Definition: Point_test.hpp:149
Test structure used for several test.
Definition: Point_test.hpp:105
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:202
static const unsigned int v
v property is at position 4 in the boost::fusion::vector
Definition: Point_test.hpp:146
Class for cpu time benchmarking.
Definition: timer.hpp:27
void stop()
Stop the timer.
Definition: timer.hpp:119