OpenFPM  5.2.0
Project that contain the implementation of distributed structures
gpu_context.hpp
1 #pragma once
2 
3 #include <exception>
4 #include <cstdarg>
5 #include <string>
6 #include "gpu_types.hpp"
7 
8 
9 namespace gpu {
10 
11 inline std::string stringprintf(const char* format, ...) {
12  va_list args;
13  va_start(args, format);
14  int len = vsnprintf(0, 0, format, args);
15  va_end(args);
16 
17  // allocate space.
18  std::string text;
19  text.resize(len);
20 
21  va_start(args, format);
22  vsnprintf(&text[0], len + 1, format, args);
23  va_end(args);
24 
25  return text;
26 }
27 
28 enum memory_space_t {
29  memory_space_device = 0,
30  memory_space_host = 1
31 };
32 
33 
34 inline std::string device_prop_string(cudaDeviceProp prop) {
35  int ordinal;
36  cudaGetDevice(&ordinal);
37 
38  size_t freeMem, totalMem;
39  cudaError_t result = cudaMemGetInfo(&freeMem, &totalMem);
40  if(cudaSuccess != result) throw cuda_exception_t(result);
41 
42  double memBandwidth = (prop.memoryClockRate * 1000.0) *
43  (prop.memoryBusWidth / 8 * 2) / 1.0e9;
44 
45  std::string s = stringprintf(
46  "%s : %8.3lf Mhz (Ordinal %d)\n"
47  "%d SMs enabled. Compute Capability sm_%d%d\n"
48  "FreeMem: %6dMB TotalMem: %6dMB %2d-bit pointers.\n"
49  "Mem Clock: %8.3lf Mhz x %d bits (%5.1lf GB/s)\n"
50  "ECC %s\n\n",
51  prop.name, prop.clockRate / 1000.0, ordinal,
52  prop.multiProcessorCount, prop.major, prop.minor,
53  (int)(freeMem / (1<< 20)), (int)(totalMem / (1<< 20)), 8 * sizeof(int*),
54  prop.memoryClockRate / 1000.0, prop.memoryBusWidth, memBandwidth,
55  prop.ECCEnabled ? "Enabled" : "Disabled");
56  return s;
57 }
58 
60 // context_t
61 // Derive context_t to add support for streams and a custom allocator.
62 
63 struct context_t {
64  context_t() = default;
65 
66  // Disable copy ctor and assignment operator. We don't want to let the
67  // user copy only a slice.
68  context_t(const context_t& rhs) = delete;
69  context_t& operator=(const context_t& rhs) = delete;
70 
71  virtual const cudaDeviceProp& props() const = 0;
72  virtual int ptx_version() const = 0;
73  virtual cudaStream_t stream() = 0;
74 
75  // Alloc GPU memory.
76  virtual void* alloc(size_t size, memory_space_t space) = 0;
77  virtual void free(void* p, memory_space_t space) = 0;
78 
79  // cudaStreamSynchronize or cudaDeviceSynchronize for stream 0.
80  virtual void synchronize() = 0;
81 
82  virtual cudaEvent_t event() = 0;
83  virtual void timer_begin() = 0;
84  virtual double timer_end() = 0;
85 };
86 
87 // Dummy kernel for retrieving PTX version.
88 template<int no_arg>
89 __global__ void empty_f() { }
90 
91 }
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221