8#ifndef OFP_CONTEXT_HXX_
9#define OFP_CONTEXT_HXX_
26 class ofp_context_t :
public context_t
34 template<
int no_arg = 0>
35 void init(
int dev_num, gpu_context_opt opt)
45 ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props ,
int dev_num = 0,
int stream_ = 0)
51 virtual const std::string& props()
const
56 virtual int ptx_version()
const
63 std::cout << __FILE__ <<
":" << __LINE__ <<
" Not implemented" << std::endl;
68 virtual void* alloc(
size_t size,
int space)
70 std::cout << __FILE__ <<
":" << __LINE__ <<
" Not implemented" << std::endl;
74 virtual void free(
void* p,
int space)
76 std::cout << __FILE__ <<
":" << __LINE__ <<
" Not implemented" << std::endl;
79 virtual void synchronize()
81 std::cout << __FILE__ <<
":" << __LINE__ <<
" Not implemented" << std::endl;
86 std::cout << __FILE__ <<
":" << __LINE__ <<
" Not implemented" << std::endl;
90 virtual void timer_begin()
92 std::cout << __FILE__ <<
":" << __LINE__ <<
" Not implemented" << std::endl;
95 virtual double timer_end()
97 std::cout << __FILE__ <<
":" << __LINE__ <<
" Not implemented" << std::endl;
101 virtual int getDevice()
103 std::cout << __FILE__ <<
":" << __LINE__ <<
" Not implemented" << std::endl;
113 #include "util/gpu_context.hpp"
128 class ofp_context_t :
public context_t
131 cudaDeviceProp _props;
133 cudaStream_t _stream;
135 cudaEvent_t _timer[2];
144 template<
int no_arg = 0>
145 void init(
int dev_num, gpu_context_opt opt)
147 cudaFuncAttributes attr;
149 cudaError_t result = cudaFuncGetAttributes(&attr, (
void *)empty_f<0>);
150 if(cudaSuccess != result)
throw cuda_exception_t(result);
151 _ptx_version = attr.ptxVersion;
158 cudaGetDeviceCount(&num_dev);
160 if (num_dev == 0) {
return;}
162 if (opt != gpu_context_opt::dummy)
164 cudaSetDevice(dev_num % num_dev);
169 cudaGetDeviceProperties(&_props, ord);
171 cudaEventCreate(&_timer[0]);
172 cudaEventCreate(&_timer[1]);
173 cudaEventCreate(&_event);
184 ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props ,
int dev_num = 0, cudaStream_t stream_ = 0)
185 :context_t(), _stream(stream_)
188 if(opt == gpu_context_opt::print_props)
190 printf(
"%s\n", device_prop_string(_props).c_str());
196 cudaEventDestroy(_timer[0]);
197 cudaEventDestroy(_timer[1]);
198 cudaEventDestroy(_event);
201 virtual const cudaDeviceProp& props()
const {
return _props; }
202 virtual int ptx_version()
const {
return _ptx_version; }
203 virtual cudaStream_t stream() {
return _stream; }
206 virtual void* alloc(
size_t size, memory_space_t space)
211 cudaError_t result = (memory_space_device == space) ?cudaMalloc(&p, size) : cudaMallocHost(&p, size);
212 if(cudaSuccess != result)
throw cuda_exception_t(result);
217 virtual void free(
void* p, memory_space_t space)
221 cudaError_t result = (memory_space_device == space) ? cudaFree(p) : cudaFreeHost(p);
222 if(cudaSuccess != result)
throw cuda_exception_t(result);
226 virtual void synchronize()
228 cudaError_t result = _stream ?
229 cudaStreamSynchronize(_stream) :
230 cudaDeviceSynchronize();
231 if(cudaSuccess != result)
throw cuda_exception_t(result);
234 virtual cudaEvent_t event()
239 virtual void timer_begin()
241 cudaEventRecord(_timer[0], _stream);
244 virtual double timer_end()
246 cudaEventRecord(_timer[1], _stream);
247 cudaEventSynchronize(_timer[1]);
249 cudaEventElapsedTime(&ms, _timer[0], _timer[1]);
253 virtual int getDevice()
262 virtual int getNDevice()
265 cudaGetDeviceCount(&num_dev);
304 ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props ,
int dev_num = 0)
Implementation of 1-D std::vector like structure.
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction