doxygen/openfpm/map__vector__sparse__cuda__ker_8cuh_source.html

/*

 * map_vector_sparse_cuda_ker.cuh

 *

 *  Created on: Jan 23, 2019

 *      Author: i-bird

 */


#ifndef MAP_VECTOR_SPARSE_CUDA_KER_CUH_

#define MAP_VECTOR_SPARSE_CUDA_KER_CUH_


#include "util/for_each_ref.hpp"


//todo: Check where it's a good place to put the following method...

template<typename dim3Ta, typename dim3Tb>

inline __device__ __host__ int dim3CoordToInt(const dim3Ta & coord, const dim3Tb & dimensions)

{

    int res = coord.z;

    res *= dimensions.y;

    res += coord.y;

    res *= dimensions.x;

    res += coord.x;

    return res;

}

// Specialization allowing transparency

inline __device__ __host__ int dim3CoordToInt(int coord, int dimension)

{

    return coord;

}


namespace openfpm

{

    template<typename index_type>

    struct sparse_index

    {

        index_type id;

    };


#if defined(__NVCC__) && !defined(CUDA_ON_CPU)

    static __shared__ int vct_atomic_add;

    static __shared__ int vct_atomic_rem;

#endif


    template<typename T,

             typename Ti,

             template<typename> class layout_base>

    class vector_sparse_gpu_ker

    {

        vector_gpu_ker<aggregate<Ti>,layout_base> vct_index;


        vector_gpu_ker<T,layout_base> vct_data;


        vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index;


        vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index;


        vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index;


        vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index;


        vector_gpu_ker<T,layout_base> vct_add_data;


        // the const is forced by the getter that only return const encap that should not allow the modification of bck

        // this should possible avoid to define an object const_encap

        //mutable vector_gpu_ker<T,layout_base> vct_data_bck;


        int nslot_add;

        int nslot_rem;


        inline __device__ void _branchfree_search(Ti x, Ti & id) const

        {

            if (vct_index.size() == 0)  {id = 0; return;}

            const Ti *base = &vct_index.template get<0>(0);

            const Ti *end = (const Ti *)vct_index.template getPointer<0>() + vct_index.size();

            Ti n = vct_data.size()-1;

            while (n > 1)

            {

                Ti half = n / 2;

                base = (base[half] < x) ? base+half : base;

                n -= half;

            }


            int off = (*base < x);

            id = base - &vct_index.template get<0>(0) + off;

            Ti v = (base + off != end)?*(base + off):(Ti)-1;

            id = (x == v)?id:vct_data.size()-1;

        }


    public:


        typedef Ti index_type;


        typedef int yes_has_check_device_pointer;


        vector_sparse_gpu_ker(vector_gpu_ker<aggregate<Ti>,layout_base> vct_index,

                              vector_gpu_ker<T,layout_base> vct_data,

                              vector_gpu_ker<aggregate<Ti>,layout_base> vct_add_index,

                              vector_gpu_ker<aggregate<Ti>,layout_base> vct_rem_index,

                              vector_gpu_ker<T,layout_base> vct_add_data,

                              vector_gpu_ker<aggregate<Ti>,layout_base> vct_nadd_index,

                              vector_gpu_ker<aggregate<Ti>,layout_base> vct_nrem_index,

                              int nslot_add,

                              int nslot_rem)

        :vct_index(vct_index),vct_data(vct_data),

         vct_add_index(vct_add_index),vct_rem_index(vct_rem_index),vct_add_data(vct_add_data),

         vct_nadd_index(vct_nadd_index),vct_nrem_index(vct_nrem_index),

         nslot_add(nslot_add),nslot_rem(nslot_rem)

        {}


        __device__ inline int size()

        {

            return vct_index.size();

        }


        __device__ inline void init()

        {

#ifdef __NVCC__

            if (threadIdx.x == 0)

            {

                vct_atomic_add = 0;

                vct_atomic_rem = 0;

            }


            __syncthreads();

#endif

        }


        __device__ inline void init_ins_inc()

        {

#ifdef __NVCC__

            if (threadIdx.x == 0)

            {

                int blockId = dim3CoordToInt(blockIdx, gridDim);

                vct_atomic_add = vct_nadd_index.template get<0>(blockId);

            }


            __syncthreads();

#endif

        }


        __device__ inline void init_rem_inc()

        {

#ifdef __NVCC__

            if (threadIdx.x == 0)

            {

                int blockId = dim3CoordToInt(blockIdx, gridDim);

                vct_atomic_rem = vct_nrem_index.template get<0>(blockId);

            }


            __syncthreads();

#endif

        }


        __device__ inline openfpm::sparse_index<Ti> get_sparse(Ti id) const

        {

            Ti di;

            this->_branchfree_search(id,di);

            openfpm::sparse_index<Ti> sid;

            sid.id = di;


            return sid;

        }


        template <unsigned int p>

        __device__ inline auto getBackground() const -> decltype(vct_data.template get<p>(0)) &

        {

            return vct_data.template get<p>(vct_data.size()-1);

        }


        template <unsigned int p>

        __device__ inline auto get(Ti id) const -> decltype(vct_data.template get<p>(id))

        {

            Ti di;

            this->_branchfree_search(id,di);

            return vct_data.template get<p>(di);

        }


        __device__ inline auto get(Ti id) const -> decltype(vct_data.get(0))

        {

            Ti di;

            Ti v = this->_branchfree_search(id,di);

            return vct_data.get(static_cast<size_t>(di));

        }


        template <unsigned int p>

        __device__ inline auto get(openfpm::sparse_index<Ti> id) const -> decltype(vct_data.template get<p>(id.id))

        {

            return vct_data.template get<p>(id.id);

        }


        template <unsigned int p>

        __device__ inline auto get(openfpm::sparse_index<Ti> id) -> decltype(vct_data.template get<p>(id.id))

        {

            return vct_data.template get<p>(id.id);

        }


        __device__ inline Ti get_index(openfpm::sparse_index<Ti> id) const

        {

            return vct_index.template get<0>(id.id);

        }


        template <unsigned int p>

        __device__ inline auto get(Ti id, Ti & di) const -> decltype(vct_data.template get<p>(id))

        {

            this->_branchfree_search(id,di);

            return vct_data.template get<p>(di);

        }


        template <unsigned int p>

        __device__ inline auto get_ele(Ti di) const -> decltype(vct_data.template get<p>(di))

        {

            return vct_data.template get<p>(di);

        }


        template <unsigned int p>

        __device__ auto insert(Ti ele) -> decltype(vct_data.template get<p>(0))

        {

#ifdef __NVCC__


            int blockId = dim3CoordToInt(blockIdx, gridDim);

            int slot_base = blockId;


            int pos = atomicAdd(&vct_atomic_add,1);

            vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;

            return vct_add_data.template get<p>(slot_base*nslot_add+pos);

#else


            printf("vector_sparse_gpu_ker.insert[1]: Error, this function in order to work is supposed to be compiled with nvcc\n");


#endif

        }


        __device__ void remove(Ti ele)

        {

#ifdef __NVCC__


            int blockId = dim3CoordToInt(blockIdx, gridDim);

            int slot_base = blockId;


            int pos = atomicAdd(&vct_atomic_rem,1);

            vct_rem_index.template get<0>(slot_base*nslot_rem+pos) = ele;


#else

            printf("vector_sparse_gpu_ker.remove: Error, this function in order to work is supposed to be compiled with nvcc\n");

#endif

        }


        __device__ auto insert(Ti ele) -> decltype(vct_add_data.get(0))

        {

#ifdef __NVCC__


            int blockId = dim3CoordToInt(blockIdx, gridDim);

            int slot_base = blockId;


            int pos = atomicAdd(&vct_atomic_add,1);

            vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;


            return vct_add_data.get(slot_base*nslot_add+pos);

#else

            printf("vector_sparse_gpu_ker.insert[2]: Error, this function in order to work is supposed to be compiled with nvcc\n");

#endif

        }


        __device__ void remove_b(Ti ele,Ti slot_base)

        {

#ifdef __NVCC__


            int pos = atomicAdd(&vct_atomic_rem,1);

            vct_rem_index.template get<0>(slot_base*nslot_rem+pos) = ele;


#else

            printf("vector_sparse_gpu_ker.remove_b: Error, this function in order to work is supposed to be compiled with nvcc\n");

#endif

        }


        template <unsigned int p>

        __device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_data.template get<p>(0))

        {

#ifdef __NVCC__


            int pos = atomicAdd(&vct_atomic_add,1);

            vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;

            return vct_add_data.template get<p>(slot_base*nslot_add+pos);

#else

            printf("vector_sparse_gpu_ker.insert_b: Error, this function in order to work is supposed to be compiled with nvcc\n");

#endif

        }


        __device__ auto insert_b(Ti ele,Ti slot_base) -> decltype(vct_add_data.get(0))

        {

#ifdef __NVCC__


            int pos = atomicAdd(&vct_atomic_add,1);

            vct_add_index.template get<0>(slot_base*nslot_add+pos) = ele;

            return vct_add_data.get(slot_base*nslot_add+pos);

#else

            printf("vector_sparse_gpu_ker.insert_b: Error, this function in order to work is supposed to be compiled with nvcc\n");

#endif

        }


        __device__ void flush_block_insert()

        {

#ifdef __NVCC__


            __syncthreads();


            if (threadIdx.x == 0)

            {

                int blockId = dim3CoordToInt(blockIdx, gridDim);

                vct_nadd_index.template get<0>(blockId) = vct_atomic_add;

            }


#else

            printf("vector_sparse_gpu_ker.flush_block_insert: Error, this function in order to work is supposed to be compiled with nvcc\n");

#endif

        }


        __device__ void flush_block_remove()

        {

#ifdef __NVCC__


            __syncthreads();


            if (threadIdx.x == 0)

            {

                int blockId = dim3CoordToInt(blockIdx, gridDim);

                vct_nrem_index.template get<0>(blockId) = vct_atomic_rem;

            }


#else

            printf("vector_sparse_gpu_ker.flush_block_remove: Error, this function in order to work is supposed to be compiled with nvcc\n");

#endif

        }


        auto & private_get_vct_nadd_index()

        {

            return vct_nadd_index;

        }


        __device__ void flush_block_insert(Ti b, bool flusher)

        {

#ifdef __NVCC__


            __syncthreads();


            if (flusher == true)

            {vct_nadd_index.template get<0>(b) = vct_atomic_add;}


#else

            printf("vector_sparse_gpu_ker.flush_block_insert: Error, this function in order to work is supposed to be compiled with nvcc\n");

#endif

        }


        __device__ auto private_get_data() -> decltype(vct_add_data.getBase().get_data_())

        {

            return vct_add_data.getBase().get_data_();

        }


        __device__ void flush_block_remove(unsigned int b, bool flusher)

        {

#ifdef __NVCC__


            __syncthreads();


            if (flusher == true)

            {vct_nrem_index.template get<0>(b) = vct_atomic_rem;}


#else

            printf("vector_sparse_gpu_ker.flush_block_remove: Error, this function in order to work is supposed to be compiled with nvcc\n");

#endif

        }


        __device__ auto getAddDataBuffer() -> decltype(vct_add_data)&

        {

            return vct_add_data;

        }


        __device__ auto getDataBuffer() -> decltype(vct_data)&

        {

            return vct_data;

        }


        __device__ auto getAddIndexBuffer() const -> const decltype(vct_add_index)&

        {

            return vct_add_index;

        }


        __device__ auto getIndexBuffer() const -> const decltype(vct_index)&

        {

            return vct_index;

        }


        __device__ auto getDataBuffer() const -> const decltype(vct_data)&

        {

            return vct_data;

        }


#ifdef SE_CLASS1


        pointer_check check_device_pointer(void * ptr)

        {

            pointer_check pc;


            pc = vct_index.check_device_pointer(ptr);


            if (pc.match == true)

            {

                pc.match_str = std::string("Index vector overflow: ") + "\n" + pc.match_str;

                return pc;

            }


            pc = vct_data.check_device_pointer(ptr);


            if (pc.match == true)

            {

                pc.match_str = std::string("Data vector overflow: ") + "\n" + pc.match_str;

                return pc;

            }


            pc = vct_add_index.check_device_pointer(ptr);


            if (pc.match == true)

            {

                pc.match_str = std::string("Add index vector overflow: ") + "\n" + pc.match_str;

                return pc;

            }


            pc = vct_rem_index.check_device_pointer(ptr);


            if (pc.match == true)

            {

                pc.match_str = std::string("Remove index vector overflow: ") + "\n" + pc.match_str;

                return pc;

            }


            pc = vct_nadd_index.check_device_pointer(ptr);


            if (pc.match == true)

            {

                pc.match_str = std::string("Add index counter vector overflow: ") + "\n" + pc.match_str;

                return pc;

            }


            pc = vct_nrem_index.check_device_pointer(ptr);


            if (pc.match == true)

            {

                pc.match_str = std::string("Remove index counter vector overflow: ") + "\n" + pc.match_str;

                return pc;

            }


            pc = vct_add_data.check_device_pointer(ptr);


            if (pc.match == true)

            {

                pc.match_str = std::string("Add data vector overflow: ") + "\n" + pc.match_str;

                return pc;

            }


            return pc;

        }


#endif


    };

}


#endif /* MAP_VECTOR_SPARSE_CUDA_KER_CUH_ */

grid_gpu_ker::get_data_
__device__ __host__ layout & get_data_()
Get the internal data_ structure.
Definition map_grid_cuda_ker.cuh:473

openfpm::vector_sparse_gpu_ker
Definition map_vector_sparse_cuda_ker.cuh:47

openfpm::vector_sparse_gpu_ker::getDataBuffer
__device__ auto getDataBuffer() const -> const decltype(vct_data)&
Get the data buffer.
Definition map_vector_sparse_cuda_ker.cuh:549

openfpm::vector_sparse_gpu_ker::flush_block_remove
__device__ void flush_block_remove(unsigned int b, bool flusher)
It insert an element in the sparse vector.
Definition map_vector_sparse_cuda_ker.cuh:495

openfpm::vector_sparse_gpu_ker::flush_block_remove
__device__ void flush_block_remove()
It insert an element in the sparse vector.
Definition map_vector_sparse_cuda_ker.cuh:445

openfpm::vector_sparse_gpu_ker::get_ele
__device__ auto get_ele(Ti di) const -> decltype(vct_data.template get< p >(di))
Get an element of the vector.
Definition map_vector_sparse_cuda_ker.cuh:299

openfpm::vector_sparse_gpu_ker::yes_has_check_device_pointer
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
Definition map_vector_sparse_cuda_ker.cuh:99

openfpm::vector_sparse_gpu_ker::_branchfree_search
__device__ void _branchfree_search(Ti x, Ti &id) const
get the element i
Definition map_vector_sparse_cuda_ker.cuh:75

openfpm::vector_sparse_gpu_ker::init
__device__ void init()
This function must be called.
Definition map_vector_sparse_cuda_ker.cuh:129

openfpm::vector_sparse_gpu_ker::size
__device__ int size()
Get the number of elements.
Definition map_vector_sparse_cuda_ker.cuh:121

openfpm::vector_sparse_gpu_ker::get_sparse
__device__ openfpm::sparse_index< Ti > get_sparse(Ti id) const
Get the sparse index.
Definition map_vector_sparse_cuda_ker.cuh:185

openfpm::vector_sparse_gpu_ker::insert_b
__device__ auto insert_b(Ti ele, Ti slot_base) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
Definition map_vector_sparse_cuda_ker.cuh:392

openfpm::vector_sparse_gpu_ker::flush_block_insert
__device__ void flush_block_insert()
It insert an element in the sparse vector.
Definition map_vector_sparse_cuda_ker.cuh:424

openfpm::vector_sparse_gpu_ker::get_index
__device__ Ti get_index(openfpm::sparse_index< Ti > id) const
Get the index associated to the element id.
Definition map_vector_sparse_cuda_ker.cuh:266

openfpm::vector_sparse_gpu_ker::getAddIndexBuffer
__device__ auto getAddIndexBuffer() const -> const decltype(vct_add_index)&
Get the indices buffer.
Definition map_vector_sparse_cuda_ker.cuh:531

openfpm::vector_sparse_gpu_ker::insert_b
__device__ auto insert_b(Ti ele, Ti slot_base) -> decltype(vct_add_data.get(0))
It insert an element in the sparse vector.
Definition map_vector_sparse_cuda_ker.cuh:408

openfpm::vector_sparse_gpu_ker::remove
__device__ void remove(Ti ele)
It insert an element in the sparse vector.
Definition map_vector_sparse_cuda_ker.cuh:333

openfpm::vector_sparse_gpu_ker::get
__device__ auto get(Ti id, Ti &di) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
Definition map_vector_sparse_cuda_ker.cuh:282

openfpm::vector_sparse_gpu_ker::get
__device__ auto get(Ti id) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
Definition map_vector_sparse_cuda_ker.cuh:214

openfpm::vector_sparse_gpu_ker::insert
__device__ auto insert(Ti ele) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
Definition map_vector_sparse_cuda_ker.cuh:309

openfpm::vector_sparse_gpu_ker::insert
__device__ auto insert(Ti ele) -> decltype(vct_add_data.get(0))
It insert an element in the sparse vector.
Definition map_vector_sparse_cuda_ker.cuh:355

openfpm::vector_sparse_gpu_ker::getIndexBuffer
__device__ auto getIndexBuffer() const -> const decltype(vct_index)&
Get the indices buffer.
Definition map_vector_sparse_cuda_ker.cuh:540

openfpm::vector_sparse_gpu_ker::init_rem_inc
__device__ void init_rem_inc()
This function must be called.
Definition map_vector_sparse_cuda_ker.cuh:161

openfpm::vector_sparse_gpu_ker::get
__device__ auto get(openfpm::sparse_index< Ti > id) const -> decltype(vct_data.template get< p >(id.id))
Get an element of the vector.
Definition map_vector_sparse_cuda_ker.cuh:239

openfpm::vector_sparse_gpu_ker::remove_b
__device__ void remove_b(Ti ele, Ti slot_base)
It insert an element in the sparse vector.
Definition map_vector_sparse_cuda_ker.cuh:375

openfpm::vector_sparse_gpu_ker::getBackground
__device__ auto getBackground() const -> decltype(vct_data.template get< p >(0)) &
Get the background value.
Definition map_vector_sparse_cuda_ker.cuh:198

openfpm::vector_sparse_gpu_ker::getDataBuffer
__device__ auto getDataBuffer() -> decltype(vct_data)&
Get the data buffer.
Definition map_vector_sparse_cuda_ker.cuh:522

openfpm::vector_sparse_gpu_ker::flush_block_insert
__device__ void flush_block_insert(Ti b, bool flusher)
It insert an element in the sparse vector.
Definition map_vector_sparse_cuda_ker.cuh:471

openfpm::vector_sparse_gpu_ker::init_ins_inc
__device__ void init_ins_inc()
This function must be called.
Definition map_vector_sparse_cuda_ker.cuh:145

openfpm::vector_sparse_gpu_ker::get
__device__ auto get(openfpm::sparse_index< Ti > id) -> decltype(vct_data.template get< p >(id.id))
Get an element of the vector.
Definition map_vector_sparse_cuda_ker.cuh:255

openfpm::vector_sparse_gpu_ker::getAddDataBuffer
__device__ auto getAddDataBuffer() -> decltype(vct_add_data)&
Get the data buffer.
Definition map_vector_sparse_cuda_ker.cuh:513

openfpm
convert a type into constant type
Definition aggregate.hpp:293

aggregate
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition aggregate.hpp:215

ele
Definition vector_performance_test.cu:160

openfpm::sparse_index
Definition map_vector_sparse_cuda_ker.cuh:34

openfpm::vector_gpu_ker
grid interface available when on gpu
Definition map_vector_cuda_ker.cuh:122

openfpm::vector_gpu_ker::get
__device__ __host__ auto get(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
Definition map_vector_cuda_ker.cuh:202

openfpm::vector_gpu_ker::size
__device__ __host__ unsigned int size() const
Return the size of the vector.
Definition map_vector_cuda_ker.cuh:170

openfpm::vector_gpu_ker::getBase
__device__ grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > & getBase()
Return the base.
Definition map_vector_cuda_ker.cuh:585

pointer_check
Definition cuda_kernel_error_checker.hpp:38

pointer_check::match_str
std::string match_str
match string
Definition cuda_kernel_error_checker.hpp:43

pointer_check::match
bool match
Indicate if the pointer match.
Definition cuda_kernel_error_checker.hpp:40