OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
ofp_context.hpp
1/*
2 * ofp_context.hpp
3 *
4 * Created on: Nov 15, 2018
5 * Author: i-bird
6 */
7
8#ifndef OFP_CONTEXT_HXX_
9#define OFP_CONTEXT_HXX_
10
11#include <iostream>
12
13#ifdef CUDA_ON_CPU
14
15namespace gpu
16{
17 enum gpu_context_opt
18 {
19 no_print_props,
20 print_props,
21 dummy
22 };
23
24 struct context_t {};
25
26 class ofp_context_t : public context_t
27 {
28 protected:
29
30 std::string _props;
31
33
34 template<int no_arg = 0>
35 void init(int dev_num, gpu_context_opt opt)
36 {}
37
38 public:
39
45 ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = 0, int stream_ = 0)
46 {}
47
48 ~ofp_context_t()
49 {}
50
51 virtual const std::string& props() const
52 {
53 return _props;
54 }
55
56 virtual int ptx_version() const
57 {
58 return 0;
59 }
60
61 virtual int stream()
62 {
63 std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
64 return 0;
65 }
66
67 // Alloc GPU memory.
68 virtual void* alloc(size_t size, int space)
69 {
70 std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
71 return NULL;
72 }
73
74 virtual void free(void* p, int space)
75 {
76 std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
77 }
78
79 virtual void synchronize()
80 {
81 std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
82 }
83
84 virtual int event()
85 {
86 std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
87 return 0;
88 }
89
90 virtual void timer_begin()
91 {
92 std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
93 }
94
95 virtual double timer_end()
96 {
97 std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
98 return 0.0;
99 }
100
101 virtual int getDevice()
102 {
103 std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;
104 return 0;
105 }
106 };
107
108}
109
110#else
111 #ifdef CUDA_GPU
112
113 #include "util/gpu_context.hpp"
114
115 namespace gpu
116 {
117 enum gpu_context_opt
118 {
119 no_print_props,
120 print_props,
121 dummy
122 };
123
125 // ofp_context_t is a trivial implementation of context_t. Users can
126 // derive this type to provide a custom allocator.
127
128 class ofp_context_t : public context_t
129 {
130 protected:
131 cudaDeviceProp _props;
132 int _ptx_version;
133 cudaStream_t _stream;
134
135 cudaEvent_t _timer[2];
136 cudaEvent_t _event;
137
141
142 // Making this a template argument means we won't generate an instance
143 // of empty_f for each translation unit.
144 template<int no_arg = 0>
145 void init(int dev_num, gpu_context_opt opt)
146 {
147 cudaFuncAttributes attr;
148 #ifdef __NVCC__
149 cudaError_t result = cudaFuncGetAttributes(&attr, (void *)empty_f<0>);
150 if(cudaSuccess != result) throw cuda_exception_t(result);
151 _ptx_version = attr.ptxVersion;
152 #else
153 _ptx_version = 60;
154 //std::cout << __FILE__ << ":" << __LINE__ << " Warning initialization of GPU context has been done from a standard Cpp file, rather than a CUDA or HIP file" << std::endl;
155 #endif
156
157 int num_dev;
158 cudaGetDeviceCount(&num_dev);
159
160 if (num_dev == 0) {return;}
161
162 if (opt != gpu_context_opt::dummy)
163 {
164 cudaSetDevice(dev_num % num_dev);
165 }
166
167 int ord;
168 cudaGetDevice(&ord);
169 cudaGetDeviceProperties(&_props, ord);
170
171 cudaEventCreate(&_timer[0]);
172 cudaEventCreate(&_timer[1]);
173 cudaEventCreate(&_event);
174 }
175
176 public:
177
178
184 ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = 0, cudaStream_t stream_ = 0)
185 :context_t(), _stream(stream_)
186 {
187 init(dev_num,opt);
188 if(opt == gpu_context_opt::print_props)
189 {
190 printf("%s\n", device_prop_string(_props).c_str());
191 }
192 }
193
194 ~ofp_context_t()
195 {
196 cudaEventDestroy(_timer[0]);
197 cudaEventDestroy(_timer[1]);
198 cudaEventDestroy(_event);
199 }
200
201 virtual const cudaDeviceProp& props() const { return _props; }
202 virtual int ptx_version() const { return _ptx_version; }
203 virtual cudaStream_t stream() { return _stream; }
204
205 // Alloc GPU memory.
206 virtual void* alloc(size_t size, memory_space_t space)
207 {
208 void* p = nullptr;
209 if(size)
210 {
211 cudaError_t result = (memory_space_device == space) ?cudaMalloc(&p, size) : cudaMallocHost(&p, size);
212 if(cudaSuccess != result) throw cuda_exception_t(result);
213 }
214 return p;
215 }
216
217 virtual void free(void* p, memory_space_t space)
218 {
219 if(p)
220 {
221 cudaError_t result = (memory_space_device == space) ? cudaFree(p) : cudaFreeHost(p);
222 if(cudaSuccess != result) throw cuda_exception_t(result);
223 }
224 }
225
226 virtual void synchronize()
227 {
228 cudaError_t result = _stream ?
229 cudaStreamSynchronize(_stream) :
230 cudaDeviceSynchronize();
231 if(cudaSuccess != result) throw cuda_exception_t(result);
232 }
233
234 virtual cudaEvent_t event()
235 {
236 return _event;
237 }
238
239 virtual void timer_begin()
240 {
241 cudaEventRecord(_timer[0], _stream);
242 }
243
244 virtual double timer_end()
245 {
246 cudaEventRecord(_timer[1], _stream);
247 cudaEventSynchronize(_timer[1]);
248 float ms;
249 cudaEventElapsedTime(&ms, _timer[0], _timer[1]);
250 return ms / 1.0e3;
251 }
252
253 virtual int getDevice()
254 {
255 int dev = 0;
256
257 cudaGetDevice(&dev);
258
259 return dev;
260 }
261
262 virtual int getNDevice()
263 {
264 int num_dev;
265 cudaGetDeviceCount(&num_dev);
266
267 return num_dev;
268 }
269
271 {
272 return tmem;
273 }
274
276 {
277 return tmem2;
278 }
279
281 {
282 return tmem3;
283 }
284 };
285
286 }
287
288 #else
289
290 namespace gpu
291 {
292
293 enum gpu_context_opt
294 {
295 no_print_props,
296 print_props,
297 dummy
298 };
299
300 // Stub class for modern gpu
301
303 {
304 ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = 0)
305 {}
306 };
307 }
308
309 #endif
310
311#endif
312
313
314#endif /* OFP_CONTEXT_HXX_ */
Implementation of 1-D std::vector like structure.
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction