doxygen/openfpm/SparseGridGpu__ker_8cuh_source.html

//

// Created by tommaso on 11/06/19.

//


#ifndef OPENFPM_PDATA_SPARSEGRIDGPU_KER_CUH

#define OPENFPM_PDATA_SPARSEGRIDGPU_KER_CUH


#include <Grid/Geometry/grid_smb.hpp>

#include "BlockMapGpu.hpp"

#include "SparseGridGpu_ker_util.hpp"


template<typename indexT>

struct block_offset

{

    indexT pos;

    indexT off;

};


//todo Remove template param GridSmT and just use BlockGeometry

template<unsigned int dim,

        unsigned int blockEdgeSize,

        typename AggregateBlockT,

        typename ct_params,

        typename indexT,

        template<typename> class layout_base,

        typename GridSmT,

        typename linearizer,

        typename BcT>

class SparseGridGpu_ker : public BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>

{

private:

    linearizer grid;

    GridSmT blockWithGhostGrid;


    BcT background;


protected:

    const static unsigned char PADDING_BIT = 1;

    static constexpr unsigned int blockSize = BlockTypeOf<AggregateBlockT, 0>::size;

    unsigned int ghostLayerSize;

    openfpm::vector_gpu_ker<aggregate<short int,short int>,memory_traits_inte> ghostLayerToThreadsMapping;

    openfpm::vector_gpu_ker<aggregate<indexT>,memory_traits_inte> nn_blocks;

    openfpm::vector_gpu_ker<aggregate<indexT>,memory_traits_inte> buffPnt;


public:

    static constexpr unsigned int d = dim;

    static constexpr unsigned int dims = dim;

    static constexpr unsigned int blockEdgeSize_ = blockEdgeSize;

    unsigned int stencilSupportRadius;

    typedef AggregateBlockT AggregateBlockType;

    typedef indexT indexT_;


    typedef int yes_has_check_device_pointer;


public:


    SparseGridGpu_ker(const openfpm::vector_sparse_gpu_ker<AggregateBlockT, indexT, layout_base> &blockMap,

                      linearizer & grid,

                      GridSmT extendedBlockGeometry,

                      unsigned int stencilSupportRadius,

                      openfpm::vector_gpu_ker<aggregate<short int,short int>,memory_traits_inte> ghostLayerToThreadsMapping,

                      openfpm::vector_gpu_ker<aggregate<indexT>,memory_traits_inte> nn_blocks,

                      openfpm::vector_gpu_ker<aggregate<indexT>,memory_traits_inte> buffPnt,

                      unsigned int ghostLayerSize,

                      BcT & bck)

            : BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>(blockMap),

              grid(grid),

              blockWithGhostGrid(extendedBlockGeometry),

              stencilSupportRadius(stencilSupportRadius),

              ghostLayerSize(ghostLayerSize),

              ghostLayerToThreadsMapping(ghostLayerToThreadsMapping),

              nn_blocks(nn_blocks),

              buffPnt(buffPnt),

              background(bck)

    {}


    template<typename headers_type>

    __device__ static int unpack_headers(headers_type & headers, unsigned char * data, int ih, int sz_pack)

    {

        size_t n_cnk;

        if (sizeof(indexT) == 8)

        {n_cnk = ((size_t *)data)[0];}

        else

        {

                unsigned int dp1 = ((unsigned int *)data)[0];

                unsigned int dp2 = ((unsigned int *)&data[4])[0];

                n_cnk = (size_t)dp1 + ((size_t)dp2 << 32);

        }

        headers.template get<1>(ih) = n_cnk;


        size_t actual_offset = n_cnk*sizeof(indexT);


        unsigned int n_pnt = *(unsigned int *)&(data[sizeof(size_t) + 2*dim*sizeof(int) + actual_offset + n_cnk*sizeof(unsigned int)]);

        headers.template get<2>(ih) = n_pnt;


        return sizeof(size_t) +               // byte required to pack the number of chunk packed

                    2*dim*sizeof(int) +           // starting point + size of the indexing packing

                      sizeof(indexT)*n_cnk +                       // byte required to pack the chunk indexes

                      align_number_device(sizeof(indexT),(n_cnk+1)*sizeof(unsigned int)) +            // byte required to pack the scan of the chunk point

                      align_number_device(sizeof(indexT),n_pnt*sz_pack) +  // byte required to pack data

                      align_number_device(sizeof(indexT),n_pnt*sizeof(short int)) + // byte required to pack offsets

                      align_number_device(sizeof(indexT),n_pnt*sizeof(unsigned char));  // byte required to pack masks;

    }


    template<typename CoordT>

    __device__ __host__ inline grid_key_dx<dim,CoordT> getGlobalCoord(const grid_key_dx<dim, CoordT> & blockCoord, unsigned int offset)

    {

        return grid.getGlobalCoord(blockCoord,offset);

    }


    template<typename CoordT>

    inline __device__ size_t getLinId(const grid_key_dx<dim, CoordT> & coord) const

    {

        return grid.LinId(coord);

    }


    inline __device__ unsigned int size(unsigned int i)

    {

        return grid.getSize()[i];

    }


    inline __device__ grid_key_dx<dim, int> getCoord(size_t linId) const

    {

        return grid.InvLinId(linId);


    }


    inline __device__ grid_key_dx<dim, int> getCoord(size_t dataBlockId, unsigned offset) const

    {

        return grid.InvLinId(dataBlockId * blockSize + offset);


    }


    template<typename ite_type>

    inline __device__ bool getInsertBlockOffset(const ite_type & itd, const grid_key_dx<dim, int> & p, grid_key_dx<dim, int> & blk, int & offset)

    {

        int accu = 1;

        offset = 0;


        bool active = true;


        for (int i = 0 ; i < dim ; i++)

        {

            blk.set_d(i,p.get(i) / getBlockEdgeSize());

            offset += (p.get(i) % getBlockEdgeSize()) * accu;

            accu *= getBlockEdgeSize();

            active = active && (p.get(i) >= (itd.start.get(i) + itd.start_base.get(i))) && (p.get(i) <= itd.stop.get(i));

        }


        return active;

    }


    template<typename CoordT>

    inline __device__ size_t getBlockLinId(CoordT blockCoord) const

    {

        return grid.BlockLinId(blockCoord);

    }


    inline __device__ grid_key_dx<dim, int> getBlockCoord(size_t blockLinId) const

    {

        return grid.BlockInvLinId(blockLinId);

    }


    inline __device__ grid_key_dx<dim, int> getBlockBaseCoord(size_t blockLinId) const

    {

        return grid.InvLinId(blockLinId * blockSize);

    }


    inline __device__ grid_key_dx<dim, int> getNeighbour(grid_key_dx<dim, int> base, unsigned int dimension, char offset) const

    {

        grid_key_dx<dim, int> res = base;

        auto i = base.get(dimension) + offset;

        res.set_d(dimension, i);

        return res;

    }


    constexpr static __device__ unsigned int getBlockEdgeSize()

    {

        return blockEdgeSize;

    }


    constexpr __device__ unsigned int getBlockSize() const

    {

        return blockSize;

    }


    inline __device__ unsigned int getEnlargedBlockSize() const

    {

        return std::pow(blockEdgeSize + 2*stencilSupportRadius, dim);

    }


    template<typename NN_type, typename indexT2>

    inline __device__  block_offset<indexT2> getNNPoint(openfpm::sparse_index<unsigned int> pos,

                                            unsigned int offset,

                                            const grid_key_dx<dim,indexT2> & mov)

    {

        block_offset<indexT2> bof;


        grid_key_dx<dim,indexT2> coord;


        for (int i = 0 ; i < dim ; i++)

        {

            coord.set_d(i,mov.get(i) + offset % blockEdgeSize);

            offset /= blockEdgeSize;

        }


        unsigned int NN_index = 0;

        unsigned int offset_nn = 0;


        bool out = NN_type::template getNNindex_offset<blockEdgeSize>(coord,NN_index,offset_nn);


        // Calculate internal coordinates


        indexT nnb = pos.id;


        if (out == true)

        {nnb = nn_blocks.template get<0>(NN_index + NN_type::nNN*pos.id);}


        bof.pos = nnb;

        bof.off = offset_nn;


        return bof;

    }


    inline __device__ unsigned int posToEnlargedBlockPos(unsigned int pos) const

    {

        // Convert pos into a linear id accounting for the ghost offsets

        unsigned int coord[dim];

        linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);

        const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);

//        const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);


        return linId;

    }


    inline __device__ grid_key_dx<dim,int>

    getCoordInEnlargedBlock(const unsigned int offset) const

    {

        unsigned int coord[dim];

        linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord);

        return grid_key_dx<dim, int>(coord);

    }


    inline __device__ unsigned int

    getLinIdInEnlargedBlock(const unsigned int offset) const

    {

        unsigned int coord[dim];

        linToCoordWithOffset<blockEdgeSize>(offset, stencilSupportRadius, coord);

        return coordToLin<blockEdgeSize>(coord, stencilSupportRadius);


//        return shift_position<dim,blockEdgeSize>::shift(offset,stencilSupportRadius);

    }


    template<typename Coordtype>

    inline __device__ unsigned int

    getNeighbourLinIdInEnlargedBlock(const grid_key_dx<dim, Coordtype> & base, grid_key_dx<dim, Coordtype> & offsets) const

    {

        grid_key_dx<dim, int> res = base + offsets;

        return coordToLin<blockEdgeSize>(res, stencilSupportRadius);

    }


    template<typename Coordtype>

    inline __device__ unsigned int

    getNeighbourLinIdInEnlargedBlock(const grid_key_dx<dim,Coordtype> & base, unsigned int dimension, char offset) const

    {

        grid_key_dx<dim, int> res = getNeighbour(base, dimension, offset);

        return coordToLin<blockEdgeSize>(res, stencilSupportRadius);

    }


    inline __device__ bool

    getIfBoundaryElementInEnlargedBlock(const grid_key_dx<dim, int> coordInEnlargedBlock, char (&boundaryDirection)[dim])

    {

        bool isBoundary = false;

        for (int d=0; d<dim; ++d)

        {

            const auto v = coordInEnlargedBlock.get(d);

            if (v==stencilSupportRadius)

            {

                boundaryDirection[d] = -1;

                isBoundary = true;

            }

            else if (v==stencilSupportRadius+blockEdgeSize-1)

            {

                boundaryDirection[d] = 1;

                isBoundary = true;

            }

            else

            {

                boundaryDirection[d] = 0;

            }

        }

        return isBoundary;

    }


    // Data management methods


    template<unsigned int p, typename CoordT>

    inline __device__ auto

    get(const grid_key_dx<dim, CoordT> & coord) const -> ScalarTypeOf<AggregateBlockT, p>

    {

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::template get<p>(grid.LinId(coord));

    }


    // Data management methods


    template<typename CoordT>

    inline __device__ void

    get_sparse(const grid_key_dx<dim, CoordT> & coord, unsigned int & dataBlockPos, unsigned int & offset) const

    {

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::get_sparse(grid.LinId(coord),dataBlockPos,offset);

    }


    template<unsigned int p, typename CoordT>

    inline __device__ auto

    get(const block_offset<CoordT> & coord) const -> decltype(std::declval<BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>>().getblockMap().template get_ele<p>(coord.pos)[coord.off])

    {

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::blockMap.template get_ele<p>(coord.pos)[coord.off];

    }


    template<unsigned int p, typename CoordT>

    inline __device__ auto

    get(const block_offset<CoordT> & coord) -> decltype(std::declval<BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>>().getblockMap().template get_ele<p>(coord.pos)[coord.off])

    {

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::blockMap.template get_ele<p>(coord.pos)[coord.off];

    }


    template<unsigned int p, typename CoordT>

    inline __device__ auto

    insert(const grid_key_dx<dim, CoordT> & coord) -> ScalarTypeOf<AggregateBlockT, p>& // should be decltype(BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::template insert<p>(0)) but LLVM complain

    {

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::template insert<p>(grid.LinId(coord));

    }


    template<typename CoordT>

    inline __device__ unsigned int getBlockId(const grid_key_dx<dim, CoordT> & coord)

    {

        // todo: check this because it's bugged! maybe?

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::getBlockId(grid.LinId(coord));

    }


    template<typename CoordT>

    inline __device__ unsigned int getOffset(const grid_key_dx<dim, CoordT> & coord)

    {

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::getOffset(grid.LinId(coord));

    }


    template<typename CoordT>

    inline __device__ auto

    getBlock(const grid_key_dx<dim, CoordT> & coord) -> decltype(BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::getBlock(0))

    {

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::getBlock(getBlockId(coord));

    }


    inline __device__ auto

    getBlock(const unsigned int blockLinId) -> decltype(BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::getBlock(0))

    {

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::getBlock(blockLinId);

    }


    template<unsigned int chunksPerBlocks = 1,typename CoordT>

    inline __device__ auto

    insertBlock(const grid_key_dx<dim, CoordT> & coord) -> decltype(BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::insertBlock(0))

    {

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::insertBlock(getBlockId(coord));

    }


    template<unsigned int chunksPerBlocks = 1>

    inline __device__ auto

    insertBlock(const indexT blockLinId, const unsigned int stride = 8192) -> decltype(BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::insertBlock(0))

    {

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::template insertBlock<chunksPerBlocks>(blockLinId,stride);

    }


    inline __device__ auto getPointBuffer() -> decltype(buffPnt) &

    {

        return buffPnt;

    }


    // Load & Store aux functions for user kernels. To be used for loading to or writing from shared memory.


    template<unsigned int p, typename AggrWrapperT>

    inline __device__ void

    loadBlock(AggrWrapperT &block, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)

    {

        //todo: Make this work well with multiples chunks per block or check not to get several chunks or dragons ahoy!

        __loadBlock<p>(block, sharedRegion);

    }


    template<unsigned int ... props, typename AggrWrapperT>

    inline __device__ void loadBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])

    {

        __loadBlock<props ...>(block, sharedRegionPtr);

    }


    template<unsigned int p , typename AggrWrapperT , typename CoordT>

    inline __device__ void

    loadGhostBlock(const AggrWrapperT & dataBlockLoad,const grid_key_dx<dim, CoordT> & coord, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)

    {

        auto blockLinId = getBlockId(coord);

        __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion);

    }


    template<unsigned int p, typename AggrWrapperT>

    inline __device__ void

    loadGhostBlock(const AggrWrapperT & dataBlockLoad, const openfpm::sparse_index<unsigned int> blockLinId, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)

    {

        __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion);

    }


    template<unsigned int p, typename AggrWrapperT>

    inline __device__ void

    loadGhostBlock(const AggrWrapperT & dataBlockLoad, const openfpm::sparse_index<unsigned int> blockLinId, ScalarTypeOf<AggregateBlockT, p> *sharedRegion, unsigned char * mask)

    {

        __loadGhostBlock<p>(dataBlockLoad,blockLinId, sharedRegion,mask);

    }


    template<unsigned int ... props, typename CoordT>

    inline __device__ void loadGhost(const grid_key_dx<dim, CoordT> & coord, const int * neighboursPos, void *sharedRegionPtr[sizeof...(props)])

    {

        auto blockLinId = getBlockId(coord);

        __loadGhost<props ...>(blockLinId, neighboursPos, sharedRegionPtr);

    }


    template<unsigned int ... props>

    inline __device__ void loadGhost(const unsigned int blockLinId, const int * neighboursPos, void *sharedRegionPtr[sizeof...(props)])

    {

        __loadGhost<props ...>(blockLinId, neighboursPos, sharedRegionPtr);

    }


    template<unsigned int p, typename AggrWrapperT>

    inline __device__ void

    storeBlock(AggrWrapperT &block, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)

    {

        //todo: Make this work well with multiples chunks per block or check not to get several chunks or dragons ahoy!

        __storeBlock<p>(block, sharedRegion);

    }


    template<unsigned int p, typename CoordT>

    inline __device__ void

    storeBlockInPlace(const grid_key_dx<dim, CoordT> & coord, ScalarTypeOf<AggregateBlockT, p> *sharedRegion)

    {

        //todo: Make this work well with multiples chunks per block or check not to get several chunks or dragons ahoy!

        auto & block = getBlock(coord);

        __storeBlock<p>(block, sharedRegion);

    }


    template<unsigned int ... props, typename AggrWrapperT>

    inline __device__ void storeBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])

    {

        __storeBlock<props ...>(block, sharedRegionPtr);

    }


    template<unsigned int ... props, typename CoordT>

    inline __device__ void storeBlockInPlace(const grid_key_dx<dim, CoordT> & coord, void *sharedRegionPtr[sizeof...(props)])

    {

        auto block = getBlock(coord);

        __storeBlock<props ...>(block, sharedRegionPtr);

    }


    template <unsigned int p, typename CoordT>

    inline __device__ ScalarTypeOf<AggregateBlockT, p> & get(

            grid_key_dx<dim, CoordT> coord,

            Box<dim, indexT> sharedMemBox,

            ScalarTypeOf<AggregateBlockT, p> *sharedRegion)

    {

        //NOTE: Size of Box must be equal to size of shared region!

        //NOTE: Box must be square

        if (sharedMemBox.isInside(coord.toPoint()))

        {

            //Get data from shared mem

            auto one = coord;

            one.one();

            const auto boxDimensions = sharedMemBox.getKP2() - sharedMemBox.getKP1() + one; // The +1 is because upper bound is inclusive

            const auto relativeCoord = coord - sharedMemBox.getKP1();

            const auto locLinId = coordToLin(relativeCoord, boxDimensions);

            return sharedRegion[locLinId];

        }

        else

        {

            //Get data from global mem

            return get(coord);

        }

    }


    template<typename CoordT>

    inline __device__ void remove(const grid_key_dx<dim, CoordT> & coord)

    {

        BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::remove(grid.LinId(coord));

    }


    template<typename BitMaskT>

    inline static __device__ bool isPadding(const BitMaskT &bitMask)

    {

        return BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::getBit(bitMask, PADDING_BIT);

    }


    template <typename keyIndexT>

    inline __device__ bool isPadding(grid_key_dx<dim, keyIndexT> coord) const

    {

        auto mask = BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::getMask(grid.LinId(coord));

        return isPadding(mask);

    }


    template<typename BitMaskT>

    inline static __device__ void setPadding(BitMaskT &bitMask)

    {

        BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::setBit(bitMask, PADDING_BIT);

    }


    template<typename BitMaskT>

    inline static __device__ void unsetPadding(BitMaskT &bitMask)

    {

        BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::unsetBit(bitMask, PADDING_BIT);

    }


    template<typename NNtype>

    inline __device__ indexT getNeighboursPos(const indexT blockId, const unsigned int offset)

    {

        //todo: also do the full neighbourhood version, this is just cross

        auto blockCoord = getBlockCoord(blockId);


        return NNtype::template getNNpos<indexT>(blockCoord,this->blockMap,*this,offset);

    }


#ifdef SE_CLASS1


        pointer_check check_device_pointer(void * ptr)

        {

            pointer_check pc;


            pc = ghostLayerToThreadsMapping.check_device_pointer(ptr);


            if (pc.match == true)

            {

                pc.match_str = std::string("ghostLayerToThreadsMapping overflow : ") + "\n" + pc.match_str;

                return pc;

            }


            pc = nn_blocks.check_device_pointer(ptr);


            if (pc.match == true)

            {

                pc.match_str = std::string("nn_blocks overflow: ") + "\n" + pc.match_str;

                return pc;

            }


            pc = ((BlockMapGpu_ker<AggregateBlockT, indexT, layout_base> *)this)->check_device_pointer(ptr);


            return pc;

        }


#endif


private:


    template<unsigned int p, typename AggrWrapperT, typename SharedPtrT>

    inline __device__ void

    __loadBlock(const AggrWrapperT &block, SharedPtrT sharedRegionPtr)

    {

        typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;


        const unsigned int pos = threadIdx.x;

        //todo: Improve this version to allow multiple chunks per block!


        // Convert pos into a linear id accounting for the ghost offsets

        unsigned int coord[dim];

        linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);

        const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);


//        const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);


        // Actually load the data into the shared region

        //ScalarT *basePtr = (ScalarT *)sharedRegionPtr;


        sharedRegionPtr[linId] = block.template get<p>()[pos];

    }


    template<unsigned int p, unsigned int ... props, typename AggrWrapperT>

    inline __device__ void

    __loadBlock(const AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)+1])

    {

        __loadBlock<p>(block, sharedRegionPtr);

        if (sizeof...(props) > 1)

        {

            __loadBlock<props ...>(block, sharedRegionPtr + 1);

        }

        else if (sizeof...(props) == 1)

        {

            __loadBlock<props ...>(block, *(sharedRegionPtr + 1));

        }

    }


    // NOTE: this must be called with linear thread grid, nice-to-have would be a generic converter (easy to do)

    // from dim3 to linear which would work under all possible launch params

    template<unsigned int p, typename SharedPtrT>

    inline __device__ void

    __loadGhostNoNN(const unsigned int blockId, SharedPtrT * sharedRegionPtr)

    {

        typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;


        const unsigned int edge = blockEdgeSize + 2*stencilSupportRadius;


        grid_key_dx<dim, int> localCoord;

        grid_key_dx<dim, int> elementCoord;


        for (int pos = threadIdx.x; pos < ghostLayerSize; pos += blockDim.x)

        {

                // Convert pos into a linear id accounting for the inner domain offsets

                const unsigned int linId = ghostLayerToThreadsMapping.template get<0>(pos);

                // Now get linear offset wrt the first element of the block

                elementCoord = getBlockBaseCoord(blockId);

                unsigned int ctr = linId;

                for (int i = 0; i < dim; ++i)

                {

                    int v = (ctr % edge) - stencilSupportRadius;

                    ctr /= edge;

                    elementCoord.set_d(i, elementCoord.get(i) + v);

                }


                // Actually load the data into the shared region

                ScalarT *basePtr = (ScalarT *)sharedRegionPtr;

                *(basePtr + linId) = get<p>(elementCoord);

        }

    }


    template<unsigned int p, typename AggrWrapperT ,typename SharedPtrT>

    inline __device__ void

    __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index<unsigned int> blockId, SharedPtrT * sharedRegionPtr)

    {

        constexpr int pM = BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::pMask;


        loadGhostBlock_impl<ct_params::nLoop,dim,AggrWrapperT,pM,p,ct_params,blockEdgeSize>::load(block,

                                                                   sharedRegionPtr,

                                                                   ghostLayerToThreadsMapping,

                                                                   nn_blocks,

                                                                   this->blockMap,

                                                                   stencilSupportRadius,

                                                                   ghostLayerSize,

                                                                   blockId.id,

                                                                   background);

    }


    template<unsigned int p, typename AggrWrapperT ,typename SharedPtrT>

    inline __device__ void

    __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index<unsigned int> blockId, SharedPtrT * sharedRegionPtr, unsigned char * maskPtr)

    {

        constexpr int pM = BlockMapGpu_ker<AggregateBlockT, indexT, layout_base>::pMask;


        loadGhostBlock_impl<ct_params::nLoop,dim,AggrWrapperT,pM,p,ct_params,blockEdgeSize>::load(block,

                                                                   sharedRegionPtr,

                                                                   maskPtr,

                                                                   ghostLayerToThreadsMapping,

                                                                   nn_blocks,

                                                                   this->blockMap,

                                                                   stencilSupportRadius,

                                                                   ghostLayerSize,

                                                                   blockId.id,

                                                                   background);

    }


    template<unsigned int p, unsigned int ... props>

    inline __device__ void

    __loadGhost(const unsigned int blockId, const int * neighboursPos, void *sharedRegionPtr[sizeof...(props)+1])

    {

        __loadGhost<p>(blockId, neighboursPos, sharedRegionPtr);

        if (sizeof...(props) > 1)

        {

            __loadGhost<props ...>(blockId, neighboursPos, sharedRegionPtr + 1);

        }

        else if (sizeof...(props) == 1)

        {

            __loadGhost<props ...>(blockId, neighboursPos, *(sharedRegionPtr + 1));

        }

    }


    template<unsigned int p, typename AggrWrapperT, typename SharedPtrT>

    inline __device__ void

    __storeBlock(AggrWrapperT &block, SharedPtrT sharedRegionPtr)

    {

        typedef ScalarTypeOf<AggregateBlockT, p> ScalarT;


        const unsigned int pos = threadIdx.x;

        //todo: Improve this version to allow multiple chunks per block!

        if (pos < blockSize)

        {

            // Convert pos into a linear id accounting for the ghost offsets

            unsigned int coord[dim];

            linToCoordWithOffset<blockEdgeSize>(pos, stencilSupportRadius, coord);

            const unsigned int linId = coordToLin<blockEdgeSize>(coord, stencilSupportRadius);

//            const unsigned int linId = shift_position<dim,blockEdgeSize>::shift(pos,stencilSupportRadius);


            // Actually store the data from the shared region

            ScalarT *basePtr = (ScalarT *)sharedRegionPtr;


            block.template get<p>()[pos] = *(basePtr + linId);

        }

    }


    template<unsigned int p, unsigned int ... props, typename AggrWrapperT>

    inline __device__ void

    __storeBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)+1])

    {

        __storeBlock<p>(block, sharedRegionPtr);

        if (sizeof...(props) > 1)

        {

            __storeBlock<props ...>(block, sharedRegionPtr + 1);

        }

        else if (sizeof...(props) == 1)

        {

            __storeBlock<props ...>(block, *(sharedRegionPtr + 1));

        }

    }

};


#endif //OPENFPM_PDATA_SPARSEGRIDGPU_KER_CUH

BlockMapGpu_ker
Definition BlockMapGpu_ker.cuh:72

Box
This class represent an N-dimensional box.
Definition Box.hpp:61

Box::isInside
__host__ __device__ bool isInside(const Point< dim, T > &p) const
Check if the point is inside the box.
Definition Box.hpp:1004

Box::getKP2
grid_key_dx< dim > getKP2() const
Get the point p12 as grid_key_dx.
Definition Box.hpp:669

Box::getKP1
grid_key_dx< dim > getKP1() const
Get the point p1 as grid_key_dx.
Definition Box.hpp:656

SparseGridGpu_ker
Definition SparseGridGpu_ker.cuh:30

SparseGridGpu_ker::loadGhost
__device__ void loadGhost(const grid_key_dx< dim, CoordT > &coord, const int *neighboursPos, void *sharedRegionPtr[sizeof...(props)])
Definition SparseGridGpu_ker.cuh:585

SparseGridGpu_ker::loadGhostBlock
__device__ void loadGhostBlock(const AggrWrapperT &dataBlockLoad, const grid_key_dx< dim, CoordT > &coord, ScalarTypeOf< AggregateBlockT, p > *sharedRegion)
Definition SparseGridGpu_ker.cuh:553

SparseGridGpu_ker::getBlockBaseCoord
__device__ grid_key_dx< dim, int > getBlockBaseCoord(size_t blockLinId) const
Given a linearized block index it return the coordinated of the lower-left point in 2D or in general ...
Definition SparseGridGpu_ker.cuh:243

SparseGridGpu_ker::getBlockEdgeSize
static constexpr __device__ unsigned int getBlockEdgeSize()
Return the size of the block edge size.
Definition SparseGridGpu_ker.cuh:261

SparseGridGpu_ker::getLinId
__device__ size_t getLinId(const grid_key_dx< dim, CoordT > &coord) const
Linearization of global coordinates.
Definition SparseGridGpu_ker.cuh:139

SparseGridGpu_ker::getGlobalCoord
__device__ __host__ grid_key_dx< dim, CoordT > getGlobalCoord(const grid_key_dx< dim, CoordT > &blockCoord, unsigned int offset)
Get the coordinate of the block and the offset id inside the block it give the global coordinate.
Definition SparseGridGpu_ker.cuh:126

SparseGridGpu_ker::get
__device__ auto get(const block_offset< CoordT > &coord) const -> decltype(std::declval< BlockMapGpu_ker< AggregateBlockT, indexT, layout_base > >().getblockMap().template get_ele< p >(coord.pos)[coord.off])
Access the grid point.
Definition SparseGridGpu_ker.cuh:426

SparseGridGpu_ker::storeBlock
__device__ void storeBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
Definition SparseGridGpu_ker.cuh:627

SparseGridGpu_ker::get
__device__ auto get(const block_offset< CoordT > &coord) -> decltype(std::declval< BlockMapGpu_ker< AggregateBlockT, indexT, layout_base > >().getblockMap().template get_ele< p >(coord.pos)[coord.off])
Access the grid point.
Definition SparseGridGpu_ker.cuh:440

SparseGridGpu_ker::size
__device__ unsigned int size(unsigned int i)
Size of the sparse grid in each direction.
Definition SparseGridGpu_ker.cuh:151

SparseGridGpu_ker::getBlockSize
constexpr __device__ unsigned int getBlockSize() const
Return the size of the block.
Definition SparseGridGpu_ker.cuh:272

SparseGridGpu_ker::getCoord
__device__ grid_key_dx< dim, int > getCoord(size_t dataBlockId, unsigned offset) const
The inversion of getLinId.
Definition SparseGridGpu_ker.cuh:176

SparseGridGpu_ker::getNNPoint
__device__ block_offset< indexT2 > getNNPoint(openfpm::sparse_index< unsigned int > pos, unsigned int offset, const grid_key_dx< dim, indexT2 > &mov)
Get the neighborhood point in one direction.
Definition SparseGridGpu_ker.cuh:297

SparseGridGpu_ker::getCoord
__device__ grid_key_dx< dim, int > getCoord(size_t linId) const
The inversion of getLinId.
Definition SparseGridGpu_ker.cuh:163

SparseGridGpu_ker::unpack_headers
static __device__ int unpack_headers(headers_type &headers, unsigned char *data, int ih, int sz_pack)
Definition SparseGridGpu_ker.cuh:90

SparseGridGpu_ker::loadBlock
__device__ void loadBlock(AggrWrapperT &block, ScalarTypeOf< AggregateBlockT, p > *sharedRegion)
Definition SparseGridGpu_ker.cuh:517

SparseGridGpu_ker::yes_has_check_device_pointer
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
Definition SparseGridGpu_ker.cuh:55

SparseGridGpu_ker::background
BcT background
background values
Definition SparseGridGpu_ker.cuh:36

SparseGridGpu_ker::__loadGhostBlock
__device__ void __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index< unsigned int > blockId, SharedPtrT *sharedRegionPtr, unsigned char *maskPtr)
Load the ghost area in the shared region.
Definition SparseGridGpu_ker.cuh:844

SparseGridGpu_ker::loadBlock
__device__ void loadBlock(AggrWrapperT &block, void *sharedRegionPtr[sizeof...(props)])
Definition SparseGridGpu_ker.cuh:534

SparseGridGpu_ker::getBlockCoord
__device__ grid_key_dx< dim, int > getBlockCoord(size_t blockLinId) const
The inversion of getBlockLinId.
Definition SparseGridGpu_ker.cuh:230

SparseGridGpu_ker::getPointBuffer
__device__ auto getPointBuffer() -> decltype(buffPnt) &
Return the buffer of points.
Definition SparseGridGpu_ker.cuh:497

SparseGridGpu_ker::getInsertBlockOffset
__device__ bool getInsertBlockOffset(const ite_type &itd, const grid_key_dx< dim, int > &p, grid_key_dx< dim, int > &blk, int &offset)
Given a point to insert, return the block-id and offset of that point.
Definition SparseGridGpu_ker.cuh:192

SparseGridGpu_ker::getEnlargedBlockSize
__device__ unsigned int getEnlargedBlockSize() const
Return the size of the block + ghost needed to apply the stencil.
Definition SparseGridGpu_ker.cuh:282

SparseGridGpu_ker::SparseGridGpu_ker
SparseGridGpu_ker(const openfpm::vector_sparse_gpu_ker< AggregateBlockT, indexT, layout_base > &blockMap, linearizer &grid, GridSmT extendedBlockGeometry, unsigned int stencilSupportRadius, openfpm::vector_gpu_ker< aggregate< short int, short int >, memory_traits_inte > ghostLayerToThreadsMapping, openfpm::vector_gpu_ker< aggregate< indexT >, memory_traits_inte > nn_blocks, openfpm::vector_gpu_ker< aggregate< indexT >, memory_traits_inte > buffPnt, unsigned int ghostLayerSize, BcT &bck)
constructor
Definition SparseGridGpu_ker.cuh:64

SparseGridGpu_ker::__loadGhostBlock
__device__ void __loadGhostBlock(const AggrWrapperT &block, const openfpm::sparse_index< unsigned int > blockId, SharedPtrT *sharedRegionPtr)
Load the ghost area in the shared region.
Definition SparseGridGpu_ker.cuh:820

SparseGridGpu_ker::getBlockLinId
__device__ size_t getBlockLinId(CoordT blockCoord) const
Linearization of block coordinates.
Definition SparseGridGpu_ker.cuh:218

grid_key_dx
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19

grid_key_dx::one
void one()
Set to one the key.
Definition grid_key.hpp:179

grid_key_dx::set_d
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition grid_key.hpp:516

grid_key_dx::toPoint
__host__ __device__ Point< dim, typeT > toPoint() const
Convert to a point the grid_key_dx.
Definition grid_key.hpp:457

grid_key_dx::get
__device__ __host__ index_type get(index_type i) const
Get the i index.
Definition grid_key.hpp:503

grid
Definition grid_test.hpp:219

openfpm::vector_sparse_gpu_ker
Definition map_vector_sparse_cuda_ker.cuh:47

NN_index
Definition vector_dist_operators_apply_kernel.hpp:85

aggregate
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition aggregate.hpp:215

block_offset
Definition SparseGridGpu_ker.cuh:14

loadGhostBlock_impl
Definition SparseGridGpu_ker_util.hpp:466

memory_traits_inte
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition memory_conf.hpp:84

openfpm::sparse_index
Definition map_vector_sparse_cuda_ker.cuh:34

openfpm::vector_gpu_ker
grid interface available when on gpu
Definition map_vector_cuda_ker.cuh:122

pointer_check
Definition cuda_kernel_error_checker.hpp:38

pointer_check::match_str
std::string match_str
match string
Definition cuda_kernel_error_checker.hpp:43

pointer_check::match
bool match
Indicate if the pointer match.
Definition cuda_kernel_error_checker.hpp:40

vertex
Sub-domain vertex graph node.
Definition VTKWriter_unit_tests.hpp:24