OpenFPM  5.2.0
Project that contain the implementation of distributed structures
ofp_context.hpp
1 /*
2  * ofp_context.hpp
3  *
4  * Created on: Nov 15, 2018
5  * Author: i-bird
6  */
7 
8 #ifndef OFP_CONTEXT_HXX_
9 #define OFP_CONTEXT_HXX_
10 
11 #include <iostream>
12 
13 #ifdef CUDA_ON_CPU
14 
15 namespace gpu
16 {
17  enum gpu_context_opt
18  {
19  no_print_props,
20  print_props,
21  dummy
22  };
23 
24  struct context_t {};
25 
26  class ofp_context_t : public context_t
27  {
28  protected:
29 
30  std::string _props;
31 
33 
34  template<int no_arg = 0>
35  void init(int dev_num, gpu_context_opt opt)
36  {}
37 
38  public:
39 
45  ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = 0, int stream_ = 0)
46  {}
47 
48  ~ofp_context_t()
49  {}
50 
51  virtual const std::string& props() const
52  {
53  return _props;
54  }
55 
56  virtual int ptx_version() const
57  {
58  return 0;
59  }
60 
61  virtual int stream()
62  {
63  std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
64  return 0;
65  }
66 
67  // Alloc GPU memory.
68  virtual void* alloc(size_t size, int space)
69  {
70  std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
71  return NULL;
72  }
73 
74  virtual void free(void* p, int space)
75  {
76  std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
77  }
78 
79  virtual void synchronize()
80  {
81  std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
82  }
83 
84  virtual int event()
85  {
86  std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
87  return 0;
88  }
89 
90  virtual void timer_begin()
91  {
92  std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
93  }
94 
95  virtual double timer_end()
96  {
97  std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
98  return 0.0;
99  }
100 
101  virtual int getDevice()
102  {
103  std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
104  return 0;
105  }
106  };
107 
108 }
109 
110 #else
111  #ifdef CUDA_GPU
112 
113  #include "util/gpu_context.hpp"
114 
115  namespace gpu
116  {
117  enum gpu_context_opt
118  {
119  no_print_props,
120  print_props,
121  dummy
122  };
123 
125  // ofp_context_t is a trivial implementation of context_t. Users can
126  // derive this type to provide a custom allocator.
127 
128  class ofp_context_t : public context_t
129  {
130  protected:
131  cudaDeviceProp _props;
132  int _ptx_version;
133  cudaStream_t _stream;
134 
135  cudaEvent_t _timer[2];
136  cudaEvent_t _event;
137 
141 
142  // Making this a template argument means we won't generate an instance
143  // of empty_f for each translation unit.
144  template<int no_arg = 0>
145  void init(int dev_num, gpu_context_opt opt)
146  {
147  cudaFuncAttributes attr;
148  #ifdef __NVCC__
149  cudaError_t result = cudaFuncGetAttributes(&attr, (void *)empty_f<0>);
150  if(cudaSuccess != result) throw cuda_exception_t(result);
151  _ptx_version = attr.ptxVersion;
152  #else
153  _ptx_version = 60;
154  //std::cout << __FILE__ << ":" << __LINE__ << " Warning initialization of GPU context has been done from a standard Cpp file, rather than a CUDA or HIP file" << std::endl;
155  #endif
156 
157  int num_dev;
158  cudaGetDeviceCount(&num_dev);
159 
160  if (num_dev == 0) {return;}
161 
162  if (opt != gpu_context_opt::dummy)
163  {
164  cudaSetDevice(dev_num % num_dev);
165  }
166 
167  int ord;
168  cudaGetDevice(&ord);
169  cudaGetDeviceProperties(&_props, ord);
170 
171  cudaEventCreate(&_timer[0]);
172  cudaEventCreate(&_timer[1]);
173  cudaEventCreate(&_event);
174  }
175 
176  public:
177 
178 
184  ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = 0, cudaStream_t stream_ = 0)
185  :context_t(), _stream(stream_)
186  {
187  init(dev_num,opt);
188  if(opt == gpu_context_opt::print_props)
189  {
190  printf("%s\n", device_prop_string(_props).c_str());
191  }
192  }
193 
194  ~ofp_context_t()
195  {
196  cudaEventDestroy(_timer[0]);
197  cudaEventDestroy(_timer[1]);
198  cudaEventDestroy(_event);
199  }
200 
201  virtual const cudaDeviceProp& props() const { return _props; }
202  virtual int ptx_version() const { return _ptx_version; }
203  virtual cudaStream_t stream() { return _stream; }
204 
205  // Alloc GPU memory.
206  virtual void* alloc(size_t size, memory_space_t space)
207  {
208  void* p = nullptr;
209  if(size)
210  {
211  cudaError_t result = (memory_space_device == space) ?cudaMalloc(&p, size) : cudaMallocHost(&p, size);
212  if(cudaSuccess != result) throw cuda_exception_t(result);
213  }
214  return p;
215  }
216 
217  virtual void free(void* p, memory_space_t space)
218  {
219  if(p)
220  {
221  cudaError_t result = (memory_space_device == space) ? cudaFree(p) : cudaFreeHost(p);
222  if(cudaSuccess != result) throw cuda_exception_t(result);
223  }
224  }
225 
226  virtual void synchronize()
227  {
228  cudaError_t result = _stream ?
229  cudaStreamSynchronize(_stream) :
230  cudaDeviceSynchronize();
231  if(cudaSuccess != result) throw cuda_exception_t(result);
232  }
233 
234  virtual cudaEvent_t event()
235  {
236  return _event;
237  }
238 
239  virtual void timer_begin()
240  {
241  cudaEventRecord(_timer[0], _stream);
242  }
243 
244  virtual double timer_end()
245  {
246  cudaEventRecord(_timer[1], _stream);
247  cudaEventSynchronize(_timer[1]);
248  float ms;
249  cudaEventElapsedTime(&ms, _timer[0], _timer[1]);
250  return ms / 1.0e3;
251  }
252 
253  virtual int getDevice()
254  {
255  int dev = 0;
256 
257  cudaGetDevice(&dev);
258 
259  return dev;
260  }
261 
262  virtual int getNDevice()
263  {
264  int num_dev;
265  cudaGetDeviceCount(&num_dev);
266 
267  return num_dev;
268  }
269 
271  {
272  return tmem;
273  }
274 
276  {
277  return tmem2;
278  }
279 
281  {
282  return tmem3;
283  }
284  };
285 
286  }
287 
288  #else
289 
290  namespace gpu
291  {
292 
293  enum gpu_context_opt
294  {
295  no_print_props,
296  print_props,
297  dummy
298  };
299 
300  // Stub class for modern gpu
301 
303  {
304  ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = 0)
305  {}
306  };
307  }
308 
309  #endif
310 
311 #endif
312 
313 
314 #endif /* OFP_CONTEXT_HXX_ */
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221