OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
vector_performance_test.cu
1#define BOOST_TEST_DYN_LINK
2#include <boost/test/unit_test.hpp>
3#include "Plot/GoogleChart.hpp"
4#include "timer.hpp"
5#include <boost/property_tree/ptree.hpp>
6#include <boost/property_tree/xml_parser.hpp>
7#include "util/performance/performance_util.hpp"
8#include "Point_test.hpp"
9#include "util/stat/common_statistics.hpp"
10
11extern const char * test_dir;
12
13typedef Point_test<float> P;
14
15constexpr int N_STAT = 32;
16
17BOOST_AUTO_TEST_SUITE( performance )
18
19#define NADD 128*128*128
20#define NADD_GPU 256*256*256
21
22// Property tree
24{
25 boost::property_tree::ptree graphs;
26};
27
28report_vector_func_tests report_vector_funcs;
29
30BOOST_AUTO_TEST_SUITE( vector_performance )
31
32BOOST_AUTO_TEST_CASE(vector_performance)
33{
34 report_vector_funcs.graphs.put("performance.vector(0).funcs.nele",NADD);
35 report_vector_funcs.graphs.put("performance.vector(0).funcs.name","add");
36
37 report_vector_funcs.graphs.put("performance.vector(1).funcs.nele",NADD);
38 report_vector_funcs.graphs.put("performance.vector(1).funcs.name","get");
39
40 std::vector<double> times(N_STAT + 1);
41 std::vector<double> times_g(N_STAT + 1);
42
43 // get test
44 double tot_accu = 0.0;
45
46 for (size_t i = 0 ; i < N_STAT+1 ; i++)
47 {
48 timer t;
49 t.start();
50
51 // create a vector
53
54 // Point
56 p.setx(1.0);
57 p.sety(2.0);
58 p.setz(3.0);
59 p.sets(4.0);
60
61 p.get<P::v>()[0] = 1.0;
62 p.get<P::v>()[1] = 2.0;
63 p.get<P::v>()[2] = 7.0;
64
65 p.get<P::t>()[0][0] = 10.0;
66 p.get<P::t>()[0][1] = 13.0;
67 p.get<P::t>()[0][2] = 8.0;
68 p.get<P::t>()[1][0] = 19.0;
69 p.get<P::t>()[1][1] = 23.0;
70 p.get<P::t>()[1][2] = 5.0;
71 p.get<P::t>()[2][0] = 4.0;
72 p.get<P::t>()[2][1] = 3.0;
73 p.get<P::t>()[2][2] = 11.0;
74
75 // Add test
76
77 for (size_t j = 0 ; j < NADD ; j++)
78 {
79 v1.add(p);
80 }
81
82 t.stop();
83 times[i] = t.getwct();
84
85 timer tg;
86 tg.start();
87
88 for (size_t j = 0 ; j < NADD ; j++)
89 {
90 double accu1 = v1.template get<P::x>(j);
91 double accu2 = v1.template get<P::y>(j);
92 double accu3 = v1.template get<P::z>(j);
93 double accu4 = v1.template get<P::s>(j);
94
95 double accu5 = v1.template get<P::v>(j)[0];
96 double accu6 = v1.template get<P::v>(j)[1];
97 double accu7 = v1.template get<P::v>(j)[2];
98
99 double accu8 = v1.template get<P::t>(j)[0][0];
100 double accu9 = v1.template get<P::t>(j)[0][1];
101 double accu10 = v1.template get<P::t>(j)[0][2];
102 double accu11 = v1.template get<P::t>(j)[1][0];
103 double accu12 = v1.template get<P::t>(j)[1][1];
104 double accu13 = v1.template get<P::t>(j)[1][2];
105 double accu14 = v1.template get<P::t>(j)[2][0];
106 double accu15 = v1.template get<P::t>(j)[2][1];
107 double accu16 = v1.template get<P::t>(j)[2][2];
108
109 tot_accu += accu1 + accu2 + accu3 + accu4 + accu5 + accu6 + accu7 + accu8 + accu9 + accu10 + accu11 + accu12 +
110 accu13 + accu14 + accu15 + accu16;
111 }
112
113 tg.stop();
114
115 times_g[i] = tg.getwct();
116 }
117
118 double mean;
119 double dev;
120 standard_deviation(times,mean,dev);
121
122 report_vector_funcs.graphs.put("performance.vector(0).y.data.mean",mean);
123 report_vector_funcs.graphs.put("performance.vector(0).y.data.dev",dev);
124
125 standard_deviation(times_g,mean,dev);
126
127 report_vector_funcs.graphs.put("performance.vector(1).y.data.mean",mean);
128 report_vector_funcs.graphs.put("performance.vector(1).y.data.dev",dev);
129}
130
131template<typename vector_prop_type, typename vector_pos_type>
132__device__ __host__ void read_write(vector_prop_type & vd_prop, vector_pos_type & vd_pos, unsigned int p)
133{
134 vd_prop.template get<0>(p) = vd_pos.template get<0>(p)[0] + vd_pos.template get<0>(p)[1];
135
136 vd_prop.template get<1>(p)[0] = vd_pos.template get<0>(p)[0];
137 vd_prop.template get<1>(p)[1] = vd_pos.template get<0>(p)[1];
138
139 vd_prop.template get<2>(p)[0][0] = vd_pos.template get<0>(p)[0];
140 vd_prop.template get<2>(p)[0][1] = vd_pos.template get<0>(p)[1];
141 vd_prop.template get<2>(p)[1][0] = vd_pos.template get<0>(p)[0] +
142 vd_pos.template get<0>(p)[1];
143 vd_prop.template get<2>(p)[1][1] = vd_pos.template get<0>(p)[1] -
144 vd_pos.template get<0>(p)[0];
145
146 vd_pos.template get<0>(p)[0] += 0.01f;
147 vd_pos.template get<0>(p)[1] += 0.01f;
148}
149
150template<typename vector_type1, typename vector_type2>
151__global__ void read_write_ker(vector_type1 v1, vector_type2 v2)
152{
153 unsigned int p = + blockIdx.x * blockDim.x + threadIdx.x;
154
155 read_write(v1,v2,p);
156}
157
158
159struct ele
160{
161 double s;
162 double v[2];
163 double t[2][2];
164};
165
166__device__ __host__ void read_write_lin(double * pos, ele * prp, unsigned int p)
167{
168 prp[p].s = pos[2*p] + pos[2*p+1];
169
170 prp[p].v[0] = pos[2*p];
171 prp[p].v[1] = pos[2*p+1];
172
173 prp[p].t[0][0] = pos[2*p];
174 prp[p].t[0][1] = pos[2*p+1];
175 prp[p].t[1][0] = pos[2*p] + pos[2*p+1];
176 prp[p].t[1][1] = pos[2*p+1] - pos[2*p];
177
178 pos[2*p] += 0.01f;
179 pos[2*p+1] += 0.01f;
180}
181
182
183__global__ void read_write_lin_ker(double * pos, ele * prp)
184{
185 unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
186
187 read_write_lin(pos,prp,p);
188}
189
190__device__ __host__ void read_write_inte(double * pos, double * prp0, double * prp1, double * prp2, unsigned int p, unsigned int n_pos)
191{
192 prp0[0*n_pos + p] = pos[0*n_pos + p] + pos[1*n_pos+p];
193
194 prp1[0*n_pos + p] = pos[0*n_pos + p];
195 prp1[1*n_pos + p] = pos[1*n_pos + p];
196
197 prp2[0*n_pos*2+0*n_pos + p] = pos[0*n_pos + p];
198 prp2[0*n_pos*2+1*n_pos + p] = pos[1*n_pos + p];
199 prp2[1*n_pos*2+0*n_pos + p] = pos[0*n_pos + p] +
200 pos[1*n_pos + p];
201 prp2[1*n_pos*2+1*n_pos + p] = pos[1*n_pos + p] -
202 pos[0*n_pos + p];
203
204 pos[0*n_pos + p] += 0.01f;
205 pos[1*n_pos + p] += 0.01f;
206}
207
208__global__ void read_write_inte_ker(double * pos, double * prp0, double * prp1, double * prp2, unsigned int n_pos)
209{
210 unsigned int p = blockIdx.x * blockDim.x + threadIdx.x;
211
212 read_write_inte(pos,prp0,prp1,prp2,p,n_pos);
213}
214
215BOOST_AUTO_TEST_CASE(vector_performance_layout_vs_plain_array)
216{
217 std::vector<double> times(N_STAT + 1);
218 std::vector<double> times_g(N_STAT + 1);
219
220 std::vector<double> times2(N_STAT + 1);
221 std::vector<double> times2_g(N_STAT + 1);
222
223 report_vector_funcs.graphs.put("performance.vector_layout(0).funcs.nele",NADD);
224 report_vector_funcs.graphs.put("performance.vector_layout(0).funcs.name","read_write_lin");
225
226 for (size_t i = 0 ; i < N_STAT+1 ; i++)
227 {
228 // create a vector
231
232 // Point
234 p.get<0>()[0] = 1.0;
235 p.get<0>()[1] = 2.0;
236
238 pa.get<0>() = 1.0;
239
240 pa.get<1>()[0] = 1.0;
241 pa.get<1>()[1] = 1.0;
242
243 pa.get<2>()[0][0] = 1.0;
244 pa.get<2>()[0][1] = 1.0;
245 pa.get<2>()[1][0] = 1.0;
246 pa.get<2>()[1][1] = 1.0;
247
248 // Add test
249
250 for (size_t j = 0 ; j < NADD ; j++)
251 {
252 v1.add(pa);
253 v2.add(p);
254 }
255
256 timer tg;
257 tg.start();
258
259 for (size_t j = 0 ; j < NADD ; j++)
260 {
261 read_write(v1,v2,j);
262 }
263
264 tg.stop();
265
266 times_g[i] = tg.getwct();
267
268 timer tga;
269 tga.start();
270
271 double * prp = (double *)v1.getPointer<0>();
272 double * pos = (double *)v2.getPointer<0>();
273
274 for (size_t j = 0 ; j < NADD ; j++)
275 {
276 read_write_lin(pos,(struct ele *)prp,j);
277 }
278
279 tga.stop();
280
281 times[i] = tga.getwct();
282 }
283
284 double mean;
285 double dev;
286 standard_deviation(times_g,mean,dev);
287
288 double mean_;
289 double dev_;
290 standard_deviation(times,mean_,dev_);
291
292 report_vector_funcs.graphs.put("performance.vector_layout(0).y.data.mean",mean_/mean);
293
294 // Deviation od x/y = x/y^2 dy + 1/y dx
295
296 report_vector_funcs.graphs.put("performance.vector_layout(0).y.data.dev",mean_/(mean*mean)*dev + dev_ / mean );
297
298 report_vector_funcs.graphs.put("performance.vector_layout(1).funcs.nele",NADD);
299 report_vector_funcs.graphs.put("performance.vector_layout(1).funcs.name","read_write_inte");
300
301 for (size_t i = 0 ; i < N_STAT+1 ; i++)
302 {
303 // create a vector
306
307 // Point
309 p.get<0>()[0] = 1.0;
310 p.get<0>()[1] = 2.0;
311
313 pa.get<0>() = 1.0;
314
315 pa.get<1>()[0] = 1.0;
316 pa.get<1>()[1] = 1.0;
317
318 pa.get<2>()[0][0] = 1.0;
319 pa.get<2>()[0][1] = 1.0;
320 pa.get<2>()[1][0] = 1.0;
321 pa.get<2>()[1][1] = 1.0;
322
323 // Add test
324
325 for (size_t j = 0 ; j < NADD ; j++)
326 {
327 v1.add(pa);
328 v2.add(p);
329 }
330
331 timer tg;
332 tg.start();
333
334 for (size_t j = 0 ; j < NADD ; j++)
335 {
336 read_write(v1,v2,j);
337 }
338
339 tg.stop();
340
341 times2_g[i] = tg.getwct();
342 int sz = v1.size();
343
344 timer tga;
345 tga.start();
346
347 double * prp0 = (double *)v1.getPointer<0>();
348 double * prp1 = (double *)v1.getPointer<1>();
349 double * prp2 = (double *)v1.getPointer<2>();
350
351 double * pos = (double *)v2.getPointer<0>();
352
353 for (size_t j = 0 ; j < NADD ; j++)
354 {
355 read_write_inte(pos,prp0,prp1,prp2,j,sz);
356 }
357
358 tga.stop();
359
360 times2[i] = tga.getwct();
361 }
362
363 double mean2;
364 double dev2;
365 standard_deviation(times2_g,mean2,dev2);
366
367 double mean2_;
368 double dev2_;
369 standard_deviation(times2,mean2_,dev2_);
370
371 report_vector_funcs.graphs.put("performance.vector_layout(1).y.data.mean",mean2_/mean2);
372
373 // Deviation od x/y = x/y^2 dy + 1/y dx
374
375 report_vector_funcs.graphs.put("performance.vector_layout(1).y.data.dev",mean2_/(mean2*mean2)*dev2 + dev2_ / mean2 );
376}
377
378BOOST_AUTO_TEST_CASE(vector_performance_gpu_layout_vs_plain_array)
379{
380 std::vector<double> times(N_STAT + 1);
381 std::vector<double> times_g(N_STAT + 1);
382
383 std::vector<double> times2(N_STAT + 1);
384 std::vector<double> times2_g(N_STAT + 1);
385
386 // get test
387 double tot_accu = 0.0;
388
389 report_vector_funcs.graphs.put("performance.vector_layout_gpu(0).funcs.nele",NADD_GPU);
390 report_vector_funcs.graphs.put("performance.vector_layout_gpu(0).funcs.name","read_write_lin");
391
392 for (size_t i = 0 ; i < N_STAT+1 ; i++)
393 {
394 // create a vector
397
398 // Point
400 p.get<0>()[0] = 1.0;
401 p.get<0>()[1] = 2.0;
402
404 pa.get<0>() = 1.0;
405
406 pa.get<1>()[0] = 1.0;
407 pa.get<1>()[1] = 1.0;
408
409 pa.get<2>()[0][0] = 1.0;
410 pa.get<2>()[0][1] = 1.0;
411 pa.get<2>()[1][0] = 1.0;
412 pa.get<2>()[1][1] = 1.0;
413
414 // Add test
415
416 for (size_t j = 0 ; j < NADD_GPU ; j++)
417 {
418 v1.add(pa);
419 v2.add(p);
420 }
421
422 auto ite = v1.getGPUIterator(1536);
423
424 {
425
426 timer tga;
427 tga.startGPU();
428 CUDA_LAUNCH(read_write_ker,ite,v1.toKernel(),v2.toKernel());
429
430 tga.stopGPU();
431 times_g[i] = tga.getwctGPU();
432 }
433
434 std::cout << "OpenFPM: " << times_g[i] << std::endl;
435
436 timer tga2;
437 tga2.startGPU();
438
439 double * prp = (double *)v1.toKernel().getPointer<0>();
440 double * pos = (double *)v2.toKernel().getPointer<0>();
441
442 CUDA_LAUNCH(read_write_lin_ker,ite,pos,(struct ele *)prp);
443
444 tga2.stopGPU();
445
446 times[i] = tga2.getwctGPU();
447 std::cout << "Array: " << times[i] << std::endl;
448 }
449
450 double mean;
451 double dev;
452 standard_deviation(times_g,mean,dev);
453
454 double mean_;
455 double dev_;
456 standard_deviation(times,mean_,dev_);
457
458 report_vector_funcs.graphs.put("performance.vector_layout_gpu(0).y.data.mean",mean_/mean);
459
460 // Deviation od x/y = x/y^2 dy + 1/y dx
461
462 report_vector_funcs.graphs.put("performance.vector_layout_gpu(0).y.data.dev",mean_/(mean*mean)*dev + dev_ / mean );
463
464 report_vector_funcs.graphs.put("performance.vector_layout_gpu(1).funcs.nele",NADD);
465 report_vector_funcs.graphs.put("performance.vector_layout_gpu(1).funcs.name","read_write_inte");
466
467 for (size_t i = 0 ; i < N_STAT+1 ; i++)
468 {
469 // create a vector
472
473 // Point
475 p.get<0>()[0] = 1.0;
476 p.get<0>()[1] = 2.0;
477
479 pa.get<0>() = 1.0;
480
481 pa.get<1>()[0] = 1.0;
482 pa.get<1>()[1] = 1.0;
483
484 pa.get<2>()[0][0] = 1.0;
485 pa.get<2>()[0][1] = 1.0;
486 pa.get<2>()[1][0] = 1.0;
487 pa.get<2>()[1][1] = 1.0;
488
489 // Add test
490
491 for (size_t j = 0 ; j < NADD_GPU ; j++)
492 {
493 v1.add(pa);
494 v2.add(p);
495 }
496
497 timer tg;
498 tg.startGPU();
499
500 auto ite = v1.getGPUIterator(1536);
501
502 CUDA_LAUNCH(read_write_ker,ite,v1.toKernel(),v2.toKernel());
503
504 tg.stopGPU();
505
506 times2_g[i] = tg.getwctGPU();
507 std::cout << "OpenFPM inte: " << times2_g[i] << std::endl;
508
509 int sz = v1.size();
510
511 timer tga;
512 tga.startGPU();
513
514 double * prp0 = (double *)v1.toKernel().getPointer<0>();
515 double * prp1 = (double *)v1.toKernel().getPointer<1>();
516 double * prp2 = (double *)v1.toKernel().getPointer<2>();
517
518 double * pos = (double *)v2.toKernel().getPointer<0>();
519
520 CUDA_LAUNCH(read_write_inte_ker,ite,pos,prp0,prp1,prp2,sz);
521
522 tga.stopGPU();
523
524 times2[i] = tga.getwctGPU();
525
526 std::cout << "Array inte: " << times2[i] << std::endl;
527 }
528
529 double mean2;
530 double dev2;
531 standard_deviation(times2_g,mean2,dev2);
532
533 double mean2_;
534 double dev2_;
535 standard_deviation(times2,mean2_,dev2_);
536
537 report_vector_funcs.graphs.put("performance.vector_layout_gpu(1).y.data.mean",mean2_/mean2);
538
539 // Deviation od x/y = x/y^2 dy + 1/y dx
540
541 report_vector_funcs.graphs.put("performance.vector_layout_gpu(1).y.data.dev",mean2_/(mean2*mean2)*dev2 + dev2_ / mean2 );
542}
543
544BOOST_AUTO_TEST_CASE(vector_performance_write_report)
545{
546 // Create a graphs
547
548 report_vector_funcs.graphs.put("graphs.graph(0).type","line");
549 report_vector_funcs.graphs.add("graphs.graph(0).title","Vector add and get");
550 report_vector_funcs.graphs.add("graphs.graph(0).x.title","Tests");
551 report_vector_funcs.graphs.add("graphs.graph(0).y.title","Time seconds");
552 report_vector_funcs.graphs.add("graphs.graph(0).y.data(0).source","performance.vector(#).y.data.mean");
553 report_vector_funcs.graphs.add("graphs.graph(0).x.data(0).source","performance.vector(#).funcs.name");
554 report_vector_funcs.graphs.add("graphs.graph(0).y.data(0).title","Actual");
555 report_vector_funcs.graphs.add("graphs.graph(0).interpolation","lines");
556
557 report_vector_funcs.graphs.put("graphs.graph(1).type","line");
558 report_vector_funcs.graphs.add("graphs.graph(1).title","Vector read write");
559 report_vector_funcs.graphs.add("graphs.graph(1).x.title","Layout");
560 report_vector_funcs.graphs.add("graphs.graph(1).y.title","Time seconds");
561 report_vector_funcs.graphs.add("graphs.graph(1).y.data(0).source","performance.vector_layout(#).y.data.mean");
562 report_vector_funcs.graphs.add("graphs.graph(1).x.data(0).source","performance.vector_layout(#).funcs.name");
563 report_vector_funcs.graphs.add("graphs.graph(1).y.data(0).title","Actual");
564 report_vector_funcs.graphs.add("graphs.graph(1).interpolation","lines");
565
566 report_vector_funcs.graphs.put("graphs.graph(2).type","line");
567 report_vector_funcs.graphs.add("graphs.graph(2).title","Vector GPU read write");
568 report_vector_funcs.graphs.add("graphs.graph(2).x.title","Layout");
569 report_vector_funcs.graphs.add("graphs.graph(2).y.title","Time seconds");
570 report_vector_funcs.graphs.add("graphs.graph(2).y.data(0).source","performance.vector_layout_gpu(#).y.data.mean");
571 report_vector_funcs.graphs.add("graphs.graph(2).x.data(0).source","performance.vector_layout_gpu(#).funcs.name");
572 report_vector_funcs.graphs.add("graphs.graph(2).y.data(0).title","Actual");
573 report_vector_funcs.graphs.add("graphs.graph(2).interpolation","lines");
574
575 boost::property_tree::xml_writer_settings<std::string> settings(' ', 4);
576 boost::property_tree::write_xml("vector_performance_funcs.xml", report_vector_funcs.graphs,std::locale(),settings);
577
578 GoogleChart cg;
579
580 std::string file_xml_ref(test_dir);
581 file_xml_ref += std::string("/openfpm_data/vector_performance_funcs_ref.xml");
582
583 StandardXMLPerformanceGraph("vector_performance_funcs.xml",file_xml_ref,cg);
584
585 addUpdateTime(cg,1,"data","vector_performance_funcs");
586
587 cg.write("vector_performance_funcs.html");
588}
589
590BOOST_AUTO_TEST_SUITE_END()
591
592BOOST_AUTO_TEST_SUITE_END()
Small class to produce graph with Google chart in HTML.
void write(std::string file)
It write the graphs on file in html format using Google charts.
This class allocate, and destroy CPU memory.
Test structure used for several test.
void sety(T y_)
set the y property
void setz(T z_)
set the z property
auto get() -> decltype(boost::fusion::at_c< i >(data))
getter method for a general property i
static const unsigned int v
v property is at position 4 in the boost::fusion::vector
void sets(T s_)
set the s property
void setx(T x_)
set the x property
static const unsigned int t
t property is at position 5 in the boost::fusion::vector
Implementation of 1-D std::vector like structure.
size_t size()
Stub size.
Class for cpu time benchmarking.
Definition timer.hpp:28
void stop()
Stop the timer.
Definition timer.hpp:119
void start()
Start the timer.
Definition timer.hpp:90
double getwct()
Return the elapsed real time.
Definition timer.hpp:130
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
__device__ __host__ boost::mpl::at< type, boost::mpl::int_< i > >::type & get()
get the properties i
Transform the boost::fusion::vector into memory specification (memory_traits)