OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
gpu_context.hpp
1#pragma once
2
3#include <exception>
4#include <cstdarg>
5#include <string>
6#include "gpu_types.hpp"
7
8
9namespace gpu {
10
11inline std::string stringprintf(const char* format, ...) {
12 va_list args;
13 va_start(args, format);
14 int len = vsnprintf(0, 0, format, args);
15 va_end(args);
16
17 // allocate space.
18 std::string text;
19 text.resize(len);
20
21 va_start(args, format);
22 vsnprintf(&text[0], len + 1, format, args);
23 va_end(args);
24
25 return text;
26}
27
28enum memory_space_t {
29 memory_space_device = 0,
30 memory_space_host = 1
31};
32
33
34inline std::string device_prop_string(cudaDeviceProp prop) {
35 int ordinal;
36 cudaGetDevice(&ordinal);
37
38 size_t freeMem, totalMem;
39 cudaError_t result = cudaMemGetInfo(&freeMem, &totalMem);
40 if(cudaSuccess != result) throw cuda_exception_t(result);
41
42 double memBandwidth = (prop.memoryClockRate * 1000.0) *
43 (prop.memoryBusWidth / 8 * 2) / 1.0e9;
44
45 std::string s = stringprintf(
46 "%s : %8.3lf Mhz (Ordinal %d)\n"
47 "%d SMs enabled. Compute Capability sm_%d%d\n"
48 "FreeMem: %6dMB TotalMem: %6dMB %2d-bit pointers.\n"
49 "Mem Clock: %8.3lf Mhz x %d bits (%5.1lf GB/s)\n"
50 "ECC %s\n\n",
51 prop.name, prop.clockRate / 1000.0, ordinal,
52 prop.multiProcessorCount, prop.major, prop.minor,
53 (int)(freeMem / (1<< 20)), (int)(totalMem / (1<< 20)), 8 * sizeof(int*),
54 prop.memoryClockRate / 1000.0, prop.memoryBusWidth, memBandwidth,
55 prop.ECCEnabled ? "Enabled" : "Disabled");
56 return s;
57}
58
60// context_t
61// Derive context_t to add support for streams and a custom allocator.
62
63struct context_t {
64 context_t() = default;
65
66 // Disable copy ctor and assignment operator. We don't want to let the
67 // user copy only a slice.
68 context_t(const context_t& rhs) = delete;
69 context_t& operator=(const context_t& rhs) = delete;
70
71 virtual const cudaDeviceProp& props() const = 0;
72 virtual int ptx_version() const = 0;
73 virtual cudaStream_t stream() = 0;
74
75 // Alloc GPU memory.
76 virtual void* alloc(size_t size, memory_space_t space) = 0;
77 virtual void free(void* p, memory_space_t space) = 0;
78
79 // cudaStreamSynchronize or cudaDeviceSynchronize for stream 0.
80 virtual void synchronize() = 0;
81
82 virtual cudaEvent_t event() = 0;
83 virtual void timer_begin() = 0;
84 virtual double timer_end() = 0;
85};
86
87// Dummy kernel for retrieving PTX version.
88template<int no_arg>
89__global__ void empty_f() { }
90
91}
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...