doxygen/openfpm/CudaMemory_8cu_source.html

#include "config.h"

#include <cstddef>

#include "CudaMemory.cuh"

#include "cuda_macro.h"

#include "util/cudify/cudify.hpp"

#include <cstring>


#define CUDA_EVENT 0x1201


bool CudaMemory::flush()

{

    if (hm != NULL && dm != NULL)

    {


        #ifdef __HIP__

        CUDA_SAFE_CALL(hipMemcpy(dm,hm,sz+32,hipMemcpyHostToDevice));

        #else

        CUDA_SAFE_CALL(cudaMemcpy(dm,hm,sz+32,cudaMemcpyHostToDevice));

        #endif

    }


    return true;

}


bool CudaMemory::allocate(size_t sz)

{

    if (dm == NULL)

    {

        #ifdef __HIP__

        CUDA_SAFE_CALL(hipMalloc(&dm,sz+32));

        #elif defined(CUDIFY_USE_CUDA)

        CUDA_SAFE_CALL(cudaMalloc(&dm,sz+32));

        #else

        if (sz != 0)

        {

            dm = new unsigned char[sz+32];

            #ifdef GARBAGE_INJECTOR

            memset(dm,0xFF,sz+32);

            #endif

        }

        #endif

    }

    else

    {

        if (sz != this->sz)

        {

            std::cout << __FILE__ << ":" << __LINE__ << " error FATAL: using allocate to resize the memory, please use resize." << std::endl;

            return false;

        }

    }


    this->sz = sz;


#if defined(GARBAGE_INJECTOR) && !defined(CUDA_ON_CPU)

    CUDA_SAFE_CALL(cudaMemset(dm,-1,sz))

#endif


    return true;

}


void CudaMemory::destroy()

{

    if (dm != NULL)

    {

        #ifdef __HIP__

        CUDA_SAFE_CALL(hipFree(dm));

        #elif defined(CUDIFY_USE_CUDA)

        CUDA_SAFE_CALL(cudaFree(dm));

        #else

        delete [] (unsigned char *)dm;

        #endif

        dm = NULL;

    }


    if (hm != NULL)

    {

        #ifdef __HIP__

        CUDA_SAFE_CALL(hipHostFree(hm));

        #elif defined(CUDIFY_USE_CUDA)

        CUDA_SAFE_CALL(cudaFreeHost(hm));

        #else

        delete [] (unsigned char *)hm;

        #endif

        hm = NULL;

    }


    sz = 0;

}


void CudaMemory::deviceToDevice(void * ptr, size_t start, size_t stop, size_t offset)

{

    #ifdef __HIP__

    CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)dm)+offset,((unsigned char *)ptr)+start,(stop-start),hipMemcpyDeviceToDevice));

    #else

    CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)dm)+offset,((unsigned char *)ptr)+start,(stop-start),cudaMemcpyDeviceToDevice));

    #endif

}


void CudaMemory::allocate_host(size_t sz) const

{

    if (hm == NULL)

    {

        #ifdef __HIP__

        CUDA_SAFE_CALL(hipHostMalloc(&hm,sz+32,hipHostMallocMapped))

        #elif defined(CUDIFY_USE_CUDA)

        CUDA_SAFE_CALL(cudaHostAlloc(&hm,sz+32,cudaHostAllocMapped))

        #else

        hm = new unsigned char[sz+32];

        #ifdef GARBAGE_INJECTOR

        memset(hm,0xFF,sz+32);

        #endif

        #endif

    }

}


bool CudaMemory::copyFromPointer(const void * ptr)

{

    // check if we have a host buffer, if not allocate it


    allocate_host(sz);


    // get the device pointer


    void * dvp;

    #ifdef __HIP__

    CUDA_SAFE_CALL(hipHostGetDevicePointer(&dvp,hm,0));

    // memory copy

    memcpy(dvp,ptr,sz+32);

    #else

    CUDA_SAFE_CALL(cudaHostGetDevicePointer(&dvp,hm,0));

    // memory copy

    memcpy(dvp,ptr,sz+32);

    #endif


    return true;

}


bool CudaMemory::copyDeviceToDevice(const CudaMemory & m)

{


    if (m.sz > sz)

    {

        std::cerr << "Error " << __LINE__ << __FILE__ << ": source buffer is too big to copy";

        return false;

    }


    if (sz != 0)

    {

        #ifdef __HIP__

        CUDA_SAFE_CALL(hipMemcpy(dm,m.dm,m.sz+32,hipMemcpyDeviceToDevice));

        #else

        CUDA_SAFE_CALL(cudaMemcpy(dm,m.dm,m.sz+32,cudaMemcpyDeviceToDevice));

        #endif

    }

    return true;

}


bool CudaMemory::copy(const memory & m)

{

    const CudaMemory * ofpm = dynamic_cast<const CudaMemory *>(&m);


    if (ofpm == NULL)

    {

        // copy the memory from device to host and from host to device


        return copyFromPointer(m.getPointer());

    }

    else

    {

        // they are the same memory type, use cuda/thrust buffer copy


        return copyDeviceToDevice(*ofpm);

    }

}


size_t CudaMemory::size() const

{

    return sz;

}


bool CudaMemory::resize(size_t sz)

{

    // if the allocated memory is enough, do not resize

    if (sz <= CudaMemory::size())

    {return true;}


    if (CudaMemory::size() == 0)

    {return allocate(sz);}


    void * thm = NULL;


    void * tdm = NULL;


    if (dm != NULL)

    {

        if (this->sz < sz)

        {

            #ifdef __HIP__

            CUDA_SAFE_CALL(hipMalloc(&tdm,sz+32));

            #elif defined(CUDIFY_USE_CUDA)

            CUDA_SAFE_CALL(cudaMalloc(&tdm,sz+32));

            #else

            tdm = new unsigned char [sz+32];

            #ifdef GARBAGE_INJECTOR

            memset(tdm,0xFF,sz+32);

            #endif

            #endif


#ifdef GARBAGE_INJECTOR

            #ifdef __HIP__

            CUDA_SAFE_CALL(hipMemset(tdm,-1,sz+32));

            #elif defined(CUDIFY_USE_CUDA)

            CUDA_SAFE_CALL(cudaMemset(tdm,-1,sz+32));

            #endif

#endif

        }


        #ifdef __HIP__

        CUDA_SAFE_CALL(hipMemcpy(tdm,dm,CudaMemory::size(),hipMemcpyDeviceToDevice));

        #else

        CUDA_SAFE_CALL(cudaMemcpy(tdm,dm,CudaMemory::size(),cudaMemcpyDeviceToDevice));

        #endif

    }


    if (hm != NULL)

    {

        if (this->sz < sz)

        {

            #ifdef __HIP__

            CUDA_SAFE_CALL(hipHostMalloc(&thm,sz+32,hipHostMallocMapped));

            #elif defined(CUDIFY_USE_CUDA)

            CUDA_SAFE_CALL(cudaHostAlloc(&thm,sz+32,cudaHostAllocMapped));

            #else

            thm = new unsigned char [sz+32];

            #ifdef GARBAGE_INJECTOR

            memset(thm,0xFF,sz+32);

            #endif

            #endif

        }


        #ifdef __HIP__

        CUDA_SAFE_CALL(hipMemcpy(thm,hm,CudaMemory::size(),hipMemcpyHostToHost));

        #else

        CUDA_SAFE_CALL(cudaMemcpy(thm,hm,CudaMemory::size(),cudaMemcpyHostToHost));

        #endif

    }


    destroy();


    dm = tdm;

    hm = thm;


    this->sz = sz;


    return true;

}


void * CudaMemory::getPointer()

{

    // allocate an host memory if not allocated

    if (hm == NULL)

        allocate_host(sz);


    return hm;

}


void CudaMemory::deviceToHost()

{

    // allocate an host memory if not allocated

    if (hm == NULL)

        allocate_host(sz);


    #ifdef __HIP__

    CUDA_SAFE_CALL(hipMemcpy(hm,dm,sz+32,hipMemcpyDeviceToHost));

    #else

    CUDA_SAFE_CALL(cudaMemcpy(hm,dm,sz+32,cudaMemcpyDeviceToHost));

    #endif

}


void CudaMemory::deviceToHost(CudaMemory & mem)

{

    // allocate an host memory if not allocated

    if (mem.hm == NULL)

        mem.allocate_host(sz);


    if (mem.sz > sz)

    {resize(mem.sz);}


    if (sz != 0)

    {

        #ifdef __HIP__

        CUDA_SAFE_CALL(hipMemcpy(mem.hm,dm,mem.sz+32,hipMemcpyDeviceToHost));

        #else

        CUDA_SAFE_CALL(cudaMemcpy(mem.hm,dm,mem.sz+32,cudaMemcpyDeviceToHost));

        #endif

    }

}


void CudaMemory::hostToDevice(CudaMemory & mem)

{

    // allocate an host memory if not allocated

    if (mem.hm == NULL)

        mem.allocate_host(sz);


    if (mem.sz > sz)

    {resize(mem.sz);}


    #ifdef __HIP__

    CUDA_SAFE_CALL(hipMemcpy(dm,mem.hm,mem.sz+32,hipMemcpyHostToDevice));

    #else

    CUDA_SAFE_CALL(cudaMemcpy(dm,mem.hm,mem.sz+32,cudaMemcpyHostToDevice));

    #endif

}


void CudaMemory::hostToDevice(size_t start, size_t stop)

{

    // allocate an host memory if not allocated

    if (hm == NULL)

        allocate_host(sz);


    #ifdef __HIP__

    CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)dm)+start,((unsigned char *)hm)+start,(stop-start),hipMemcpyHostToDevice));

    #else

    CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)dm)+start,((unsigned char *)hm)+start,(stop-start),cudaMemcpyHostToDevice));

    #endif

}


void CudaMemory::deviceToHost(size_t start, size_t stop)

{

    // allocate an host memory if not allocated

    if (hm == NULL)

        allocate_host(sz);


    #ifdef __HIP__

    CUDA_SAFE_CALL(hipMemcpy(((unsigned char *)hm)+start,((unsigned char *)dm)+start,(stop-start),hipMemcpyDeviceToHost));

    #else

    CUDA_SAFE_CALL(cudaMemcpy(((unsigned char *)hm)+start,((unsigned char *)dm)+start,(stop-start),cudaMemcpyDeviceToHost));

    #endif

}


const void * CudaMemory::getPointer() const

{

    // allocate an host memory if not allocated

    if (hm == NULL)

        allocate_host(sz);


    return hm;

}


void CudaMemory::fill(unsigned char c)

{

    #ifdef __HIP__

    CUDA_SAFE_CALL(hipMemset(dm,c,size()));

    #elif defined(CUDIFY_USE_CUDA)

    CUDA_SAFE_CALL(cudaMemset(dm,c,size()));

    #else

    memset(dm,c,size());

    #endif

    if (hm != NULL)

    {memset(hm,c,size());}

}


void * CudaMemory::getDevicePointer()

{

    return dm;

}


void CudaMemory::hostToDevice()

{

    // allocate an host memory if not allocated

    if (hm == NULL)

        allocate_host(sz);


    #ifdef __HIP__

    CUDA_SAFE_CALL(hipMemcpy(dm,hm,sz+32,hipMemcpyHostToDevice));

    #else

    CUDA_SAFE_CALL(cudaMemcpy(dm,hm,sz+32,cudaMemcpyHostToDevice));

    #endif

}


void CudaMemory::swap(CudaMemory & mem)

{

    size_t sz_tmp;

    void * dm_tmp;

//  long int ref_cnt_tmp;

    bool is_hm_sync_tmp;

    void * hm_tmp;


    hm_tmp = hm;

    is_hm_sync_tmp = is_hm_sync;

    sz_tmp = sz;

    dm_tmp = dm;

//  ref_cnt_tmp = ref_cnt;


    hm = mem.hm;

    is_hm_sync = mem.is_hm_sync;

    sz = mem.sz;

    dm = mem.dm;

    ref_cnt = mem.ref_cnt;


    mem.hm = hm_tmp;

    mem.is_hm_sync = is_hm_sync_tmp;

    mem.sz = sz_tmp;

    mem.dm = dm_tmp;

//  mem.ref_cnt = ref_cnt_tmp;

}

CudaMemory
Definition CudaMemory.cuh:59

CudaMemory::deviceToDevice
void deviceToDevice(void *ptr, size_t start, size_t stop, size_t offset)
copy memory from device to device
Definition CudaMemory.cu:119

CudaMemory::resize
virtual bool resize(size_t sz)
resize the momory allocated
Definition CudaMemory.cu:261

CudaMemory::ref_cnt
size_t ref_cnt
Reference counter.
Definition CudaMemory.cuh:73

CudaMemory::is_hm_sync
bool is_hm_sync
Is the host memory synchronized with the GPU memory.
Definition CudaMemory.cuh:61

CudaMemory::dm
void * dm
device memory
Definition CudaMemory.cuh:67

CudaMemory::hm
void * hm
host memory
Definition CudaMemory.cuh:70

CudaMemory::size
virtual size_t size() const
the the size of the allocated memory
Definition CudaMemory.cu:245

CudaMemory::copyFromPointer
bool copyFromPointer(const void *ptr)
copy from Pointer to GPU
Definition CudaMemory.cu:157

CudaMemory::sz
size_t sz
Size of the memory.
Definition CudaMemory.cuh:64

CudaMemory::allocate_host
void allocate_host(size_t sz) const
Allocate an host buffer.
Definition CudaMemory.cu:133

CudaMemory::copyDeviceToDevice
bool copyDeviceToDevice(const CudaMemory &m)
copy from GPU to GPU buffer directly
Definition CudaMemory.cu:187

CudaMemory::destroy
virtual void destroy()
destroy memory
Definition CudaMemory.cu:80

HeapMemory::flush
virtual bool flush()
flush the memory
Definition HeapMemory.hpp:66

HeapMemory::allocate
virtual bool allocate(size_t sz)
allocate memory
Definition HeapMemory.cpp:33

HeapMemory::copyFromPointer
bool copyFromPointer(const void *ptr, size_t sz)
copy from Pointer to Heap
Definition HeapMemory.cpp:89

HeapMemory::fill
virtual void fill(unsigned char c)
fill host and device memory with the selected byte
Definition HeapMemory.cpp:22

HeapMemory::resize
virtual bool resize(size_t sz)
resize the memory allocated
Definition HeapMemory.cpp:167

HeapMemory::getPointer
virtual void * getPointer()
get a readable pointer with the data
Definition HeapMemory.cpp:228

HeapMemory::size
virtual size_t size() const
the the size of the allocated memory
Definition HeapMemory.cpp:153

HeapMemory::deviceToHost
virtual void deviceToHost()
Do nothing.
Definition HeapMemory.hpp:96

HeapMemory::copy
virtual bool copy(const memory &m)
copy memory
Definition HeapMemory.cpp:123

HeapMemory::swap
void swap(HeapMemory &mem)
Swap the memory.
Definition HeapMemory.hpp:175

HeapMemory::hostToDevice
virtual void hostToDevice()
Do nothing.
Definition HeapMemory.hpp:93

HeapMemory::getDevicePointer
virtual void * getDevicePointer()
get a device pointer for HeapMemory getPointer and getDevicePointer are equivalents
Definition HeapMemory.cpp:218

HeapMemory::destroy
virtual void destroy()
destroy memory
Definition HeapMemory.cpp:73

HeapMemory::copyDeviceToDevice
bool copyDeviceToDevice(const HeapMemory &m)
copy from same Heap to Heap
Definition HeapMemory.cpp:103

memory
Definition memory.hpp:23

memory::getPointer
virtual void * getPointer()=0
return a data pointer

allocate
this class is a functor for "for_each" algorithm
Definition grid_base_impl_layout.hpp:28