OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
main.cu
1
2#ifdef __NVCC__
3
4#define PRINT_STACKTRACE
5//#define STOP_ON_ERROR
6#define OPENMPI
7//#define SE_CLASS1
8
9//#define USE_LOW_REGISTER_ITERATOR
10
11#include "Vector/vector_dist.hpp"
12#include <math.h>
13#include "Draw/DrawParticles.hpp"
14#include "util/stat/common_statistics.hpp"
15
16
17__global__ void test1_syncthreads()
18{
19 __syncthreads();
20 __syncthreads();
21 __syncthreads();
22 __syncthreads();
23 __syncthreads();
24 __syncthreads();
25
26 __syncthreads();
27 __syncthreads();
28 __syncthreads();
29 __syncthreads();
30 __syncthreads();
31 __syncthreads();
32
33 __syncthreads();
34 __syncthreads();
35 __syncthreads();
36 __syncthreads();
37 __syncthreads();
38 __syncthreads();
39
40 __syncthreads();
41 __syncthreads();
42 __syncthreads();
43 __syncthreads();
44 __syncthreads();
45 __syncthreads();
46}
47
48
49struct ite_g
50{
51 dim3 wthr;
52 dim3 thr;
53
54 size_t nblocks()
55 {
56 return wthr.x * wthr.y * wthr.z;
57 }
58
59 size_t nthrs()
60 {
61 return thr.x * thr.y * thr.z;
62 }
63};
64
65int main(int argc, char* argv[])
66{
67
68 // initialize the library
69 openfpm_init(&argc,&argv);
70
72
73 ite_g g;
74
75 g.wthr = dim3(512*512,1,1);
76 g.thr = dim3(8,1,1);
77
78 for (int i = 0; i < 10; i++)
79 {
80 timer t_ker;
81 t_ker.start();
82
83 CUDA_LAUNCH(test1_syncthreads,g);
84
85 t_ker.stop();
86
87 std::cout << "TKERNEL: " << t_ker.getwct() << std::endl;
88
89
90
91
93
94 tele_ker.add(t_ker.getwct());
95
96
98
99 }
100
101 double tele_ker_mean;
102 double tele_ker_dev;
103 standard_deviation(tele_ker,tele_ker_mean,tele_ker_dev);
104
105 std::cout << g.wthr.x*g.wthr.y*g.wthr.z << " " << g.thr.x << std::endl;
106 std::cout << "SYNCTHREAD LATENCY: " << tele_ker_mean / (g.wthr.x*g.wthr.y*g.wthr.z*24*g.thr.x) * 1e9 << " ns " << " error: " << tele_ker_dev << std::endl;
107
108 openfpm_finalize();
109}
110
111#else
112
113int main(int argc, char* argv[])
114{
115 return 0;
116}
117
118#endif
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
Definition timer.hpp:28
void stop()
Stop the timer.
Definition timer.hpp:119
void start()
Start the timer.
Definition timer.hpp:90
double getwct()
Return the elapsed real time.
Definition timer.hpp:130