OpenFPM  5.2.0
Project that contain the implementation of distributed structures
main.cu
1 
2 #ifdef __NVCC__
3 
4 #define PRINT_STACKTRACE
5 //#define STOP_ON_ERROR
6 #define OPENMPI
7 //#define SE_CLASS1
8 
9 //#define USE_LOW_REGISTER_ITERATOR
10 
11 #include "Vector/vector_dist.hpp"
12 #include <math.h>
13 #include "Draw/DrawParticles.hpp"
14 #include "util/stat/common_statistics.hpp"
15 
16 
17 __global__ void test1_syncthreads()
18 {
19  __syncthreads();
20  __syncthreads();
21  __syncthreads();
22  __syncthreads();
23  __syncthreads();
24  __syncthreads();
25 
26  __syncthreads();
27  __syncthreads();
28  __syncthreads();
29  __syncthreads();
30  __syncthreads();
31  __syncthreads();
32 
33  __syncthreads();
34  __syncthreads();
35  __syncthreads();
36  __syncthreads();
37  __syncthreads();
38  __syncthreads();
39 
40  __syncthreads();
41  __syncthreads();
42  __syncthreads();
43  __syncthreads();
44  __syncthreads();
45  __syncthreads();
46 }
47 
48 
49 struct ite_g
50 {
51  dim3 wthr;
52  dim3 thr;
53 
54  size_t nblocks()
55  {
56  return wthr.x * wthr.y * wthr.z;
57  }
58 
59  size_t nthrs()
60  {
61  return thr.x * thr.y * thr.z;
62  }
63 };
64 
65 int main(int argc, char* argv[])
66 {
67 
68  // initialize the library
69  openfpm_init(&argc,&argv);
70 
71  openfpm::vector<double> tele_ker;
72 
73  ite_g g;
74 
75  g.wthr = dim3(512*512,1,1);
76  g.thr = dim3(8,1,1);
77 
78  for (int i = 0; i < 10; i++)
79  {
80  timer t_ker;
81  t_ker.start();
82 
83  CUDA_LAUNCH(test1_syncthreads,g);
84 
85  t_ker.stop();
86 
87  std::cout << "TKERNEL: " << t_ker.getwct() << std::endl;
88 
89 
90 
91 
93 
94  tele_ker.add(t_ker.getwct());
95 
96 
98 
99  }
100 
101  double tele_ker_mean;
102  double tele_ker_dev;
103  standard_deviation(tele_ker,tele_ker_mean,tele_ker_dev);
104 
105  std::cout << g.wthr.x*g.wthr.y*g.wthr.z << " " << g.thr.x << std::endl;
106  std::cout << "SYNCTHREAD LATENCY: " << tele_ker_mean / (g.wthr.x*g.wthr.y*g.wthr.z*24*g.thr.x) * 1e9 << " ns " << " error: " << tele_ker_dev << std::endl;
107 
108  openfpm_finalize();
109 }
110 
111 #else
112 
113 int main(int argc, char* argv[])
114 {
115  return 0;
116 }
117 
118 #endif
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
Class for cpu time benchmarking.
Definition: timer.hpp:28
void stop()
Stop the timer.
Definition: timer.hpp:119
void start()
Start the timer.
Definition: timer.hpp:90
double getwct()
Return the elapsed real time.
Definition: timer.hpp:130