doxygen/openfpm/gpu__context_8hpp_source.html

#pragma once


#include <exception>

#include <cstdarg>

#include <string>

#include "gpu_types.hpp"


namespace gpu {


inline std::string stringprintf(const char* format, ...) {

  va_list args;

  va_start(args, format);

  int len = vsnprintf(0, 0, format, args);

  va_end(args);


  // allocate space.

  std::string text;

  text.resize(len);


  va_start(args, format);

  vsnprintf(&text[0], len + 1, format, args);

  va_end(args);


  return text;

}


enum memory_space_t {

  memory_space_device = 0,

  memory_space_host = 1

};


inline std::string device_prop_string(cudaDeviceProp prop) {

  int ordinal;

  cudaGetDevice(&ordinal);


  size_t freeMem, totalMem;

  cudaError_t result = cudaMemGetInfo(&freeMem, &totalMem);

  if(cudaSuccess != result) throw cuda_exception_t(result);


  double memBandwidth = (prop.memoryClockRate * 1000.0) *

    (prop.memoryBusWidth / 8 * 2) / 1.0e9;


  std::string s = stringprintf(

    "%s : %8.3lf Mhz   (Ordinal %d)\n"

    "%d SMs enabled. Compute Capability sm_%d%d\n"

    "FreeMem: %6dMB   TotalMem: %6dMB   %2d-bit pointers.\n"

    "Mem Clock: %8.3lf Mhz x %d bits   (%5.1lf GB/s)\n"

    "ECC %s\n\n",

    prop.name, prop.clockRate / 1000.0, ordinal,

    prop.multiProcessorCount, prop.major, prop.minor,

    (int)(freeMem / (1<< 20)), (int)(totalMem / (1<< 20)), 8 * sizeof(int*),

    prop.memoryClockRate / 1000.0, prop.memoryBusWidth, memBandwidth,

    prop.ECCEnabled ? "Enabled" : "Disabled");

  return s;

}


// context_t

// Derive context_t to add support for streams and a custom allocator.


struct context_t {

  context_t() = default;


  // Disable copy ctor and assignment operator. We don't want to let the

  // user copy only a slice.

  context_t(const context_t& rhs) = delete;

  context_t& operator=(const context_t& rhs) = delete;


  virtual const cudaDeviceProp& props() const = 0;

  virtual int ptx_version() const = 0;

  virtual cudaStream_t stream() = 0;


  // Alloc GPU memory.

  virtual void* alloc(size_t size, memory_space_t space) = 0;

  virtual void free(void* p, memory_space_t space) = 0;


  // cudaStreamSynchronize or cudaDeviceSynchronize for stream 0.

  virtual void synchronize() = 0;


  virtual cudaEvent_t event() = 0;

  virtual void timer_begin() = 0;

  virtual double timer_end() = 0;

};


// Dummy kernel for retrieving PTX version.

template<int no_arg>

__global__ void empty_f() { }


}

aggregate
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition aggregate.hpp:215

gpu::context_t
Definition gpu_context.hpp:63