doxygen/openfpm/CellList__gpu_8hpp_source.html

/*

 * CellList_gpu.hpp

 *

 *  Created on: Jun 11, 2018

 *      Author: i-bird

 */


#ifndef OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_

#define OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_


#include "config.h"


#ifdef CUDA_GPU


#include "Vector/map_vector_sparse.hpp"

#include "NN/CellList/CellDecomposer.hpp"

#include "Vector/map_vector.hpp"

#include "Cuda_cell_list_util_func.hpp"

#include "NN/CellList/cuda/CellList_gpu_ker.cuh"

#include "util/cuda_util.hpp"

#include "NN/CellList/CellList_util.hpp"

#include "NN/CellList/CellList.hpp"

#include "util/cuda/scan_ofp.cuh"


constexpr int count = 0;

constexpr int start = 1;


template<unsigned int dim, typename T,

         typename cnt_type, typename ids_type,

         typename Memory,typename transform,

         typename vector_cnt_type, typename vector_cnt_type2,

         typename cl_sparse_type,

         bool is_sparse>

struct CellList_gpu_ker_selector

{

    static inline CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse> get(vector_cnt_type & starts,

                                                                             vector_cnt_type & cell_nn,

                                                                             vector_cnt_type2 & cell_nn_list,

                                                                             cl_sparse_type & cl_sparse,

                                                                             vector_cnt_type & sorted_to_not_sorted,

                                                                             vector_cnt_type & sorted_domain_particles_ids,

                                                                             openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad,

                                                                             openfpm::array<T,dim,cnt_type> & spacing_c,

                                                                             openfpm::array<ids_type,dim,cnt_type> & div_c,

                                                                             openfpm::array<ids_type,dim,cnt_type> & off,

                                                                             const transform & t,

                                                                             unsigned int g_m,

                                                                             const SpaceBox<dim,T>& box_unit,

                                                                             const grid_sm<dim,void>& gr_cell,

                                                                             const Point<dim,long int>& cell_shift)

    {

        return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse>(starts.toKernel(),

                                                                            sorted_to_not_sorted.toKernel(),

                                                                            sorted_domain_particles_ids.toKernel(),

                                                                            nnc_rad.toKernel(),

                                                                            spacing_c,

                                                                            div_c,

                                                                            off,

                                                                            t,

                                                                            g_m,

                                                                            box_unit,

                                                                            gr_cell,

                                                                            cell_shift);

    }

};


template<unsigned int dim, typename T,

         typename cnt_type, typename ids_type,

         typename Memory,typename transform,

         typename vector_cnt_type, typename vector_cnt_type2,

         typename cl_sparse_type>

struct CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,vector_cnt_type,vector_cnt_type2,cl_sparse_type,true>

{

    static CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true> get(vector_cnt_type & starts,

             vector_cnt_type & cell_nn,

             vector_cnt_type2 & cell_nn_list,

             cl_sparse_type & cl_sparse,

             vector_cnt_type & srt,

             vector_cnt_type & dprt,

             openfpm::vector<aggregate<int>,Memory,memory_traits_inte> & nnc_rad,

             openfpm::array<T,dim,cnt_type> & spacing_c,

             openfpm::array<ids_type,dim,cnt_type> & div_c,

             openfpm::array<ids_type,dim,cnt_type> & off,

             const transform & t,

             unsigned int g_m,

             const SpaceBox<dim,T>& box_unit,

             const grid_sm<dim,void>& gr_cell,

             const Point<dim,long int>& cell_shift)


    {

        return CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,true>(cell_nn.toKernel(),

                                                                        cell_nn_list.toKernel(),

                                                                        cl_sparse.toKernel(),

                                                                        srt.toKernel(),

                                                                        dprt.toKernel(),

                                                                        spacing_c,

                                                                        div_c,

                                                                        off,

                                                                        t,

                                                                        g_m,

                                                                        box_unit,

                                                                        gr_cell,

                                                                        cell_shift);

    }

};


template<unsigned int dim,

         typename T,

         typename Memory,

         typename transform = no_transform_only<dim,T>,

         typename cnt_type = unsigned int,

         typename ids_type = int,

         bool is_sparse = false>

class CellList_gpu : public CellDecomposer_sm<dim,T,transform>

{

    typedef openfpm::vector<aggregate<cnt_type>,Memory,memory_traits_inte> vector_cnt_type;


    vector_cnt_type cl_n;


    vector_cnt_type cells;


    vector_cnt_type starts;


    openfpm::vector_sparse_gpu<aggregate<cnt_type>> cl_sparse;


    openfpm::vector_gpu<aggregate<cnt_type>> cells_nn;


    openfpm::vector_gpu<aggregate<cnt_type,cnt_type>> cells_nn_list;


    openfpm::vector<aggregate<cnt_type[2]>,Memory,memory_traits_inte> part_ids;


    int cells_nn_test_size;


    openfpm::vector_gpu<aggregate<int>> cells_nn_test;


    vector_cnt_type sorted_to_not_sorted;


    vector_cnt_type sorted_domain_particles_dg;


    vector_cnt_type sorted_domain_particles_ids;


    vector_cnt_type non_sorted_to_sorted;


    openfpm::array<T,dim,cnt_type> spacing_c;


    openfpm::array<ids_type,dim,cnt_type> div_c;


    openfpm::array<ids_type,dim,cnt_type> off;


    openfpm::vector<aggregate<int>,Memory,memory_traits_inte> nnc_rad;


    size_t n_dec;


    void InitializeStructures(const size_t (& div)[dim], size_t tot_n_cell, size_t pad)

    {

        for (size_t i = 0 ; i < dim ; i++)

        {

            div_c[i] = div[i];

            spacing_c[i] = this->getCellBox().getP2().get(i);

            off[i] = pad;

        }


        cl_n.resize(tot_n_cell);


        cells_nn_test_size = 1;

        construct_cell_nn_test(cells_nn_test_size);

    }


    void construct_cell_nn_test(unsigned int box_nn = 1)

    {

        auto & gs = this->getGrid();


        grid_key_dx<dim> start;

        grid_key_dx<dim> stop;

        grid_key_dx<dim> middle;


        for (size_t i = 0 ; i < dim ; i++)

        {

            start.set_d(i,0);

            stop.set_d(i,2*box_nn);

            middle.set_d(i,box_nn);

        }


        cells_nn_test.resize(openfpm::math::pow(2*box_nn+1,dim));


        int mid = gs.LinId(middle);


        grid_key_dx_iterator_sub<dim> it(gs,start,stop);


        size_t i = 0;

        while (it.isNext())

        {

            auto p = it.get();


            cells_nn_test.template get<0>(i) = (int)gs.LinId(p) - mid;


            ++i;

            ++it;

        }


        cells_nn_test.template hostToDevice<0>();


#if defined(__NVCC__) && defined(USE_LOW_REGISTER_ITERATOR)


        // copy to the constant memory

        cudaMemcpyToSymbol(cells_striding,cells_nn_test.template getPointer<0>(),cells_nn_test.size()*sizeof(int));


#endif

    }


    template<typename vector, typename vector_prp, unsigned int ... prp>

    void construct_sparse(vector & pl,

                          vector & pl_out,

                          vector_prp & pl_prp,

                          vector_prp & pl_prp_out,

                          gpu::ofp_context_t & gpuContext,

                          size_t g_m,

                          size_t start,

                          size_t stop,

                          cl_construct_opt opt = cl_construct_opt::Full)

    {

#ifdef __NVCC__


        part_ids.resize(stop - start);

        starts.resize(stop - start);


        // Than we construct the ids


        auto ite_gpu = pl.getGPUIteratorTo(stop-start);


        if (ite_gpu.wthr.x == 0)

        {

            return;

        }


        CUDA_LAUNCH((subindex<true,dim,T,cnt_type,ids_type>),ite_gpu,div_c,

                                                                        spacing_c,

                                                                        off,

                                                                        this->getTransform(),

                                                                        pl.size(),

                                                                        start,

                                                                        pl.toKernel(),

                                                                        starts.toKernel(),

                                                                        part_ids.toKernel());


        // now we construct the cells


        cells.resize(stop-start);


        // Here we fill the sparse vector

        cl_sparse.clear();

        cl_sparse.template setBackground<0>((cnt_type)-1);

        cl_sparse.setGPUInsertBuffer(ite_gpu.wthr.x,ite_gpu.thr.x);

        CUDA_LAUNCH((fill_cells_sparse),ite_gpu,cl_sparse.toKernel(),starts.toKernel());

        cl_sparse.template flush_vd<sstart_<0>>(cells,gpuContext,FLUSH_ON_DEVICE);


        cells_nn.resize(cl_sparse.size()+1);

        cells_nn.template fill<0>(0);


        // Here we construct the neighborhood cells for each cell

        auto itgg = cl_sparse.getGPUIterator();

        CUDA_LAUNCH((count_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel());


        // now we scan

        openfpm::scan((cnt_type *)cells_nn.template getDeviceBuffer<0>(), cells_nn.size(), (cnt_type *)cells_nn.template getDeviceBuffer<0>() , gpuContext);


        cells_nn.template deviceToHost<0>(cells_nn.size() - 1, cells_nn.size() - 1);

        size_t n_nn_cells = cells_nn.template get<0>(cells_nn.size() - 1);


        cells_nn_list.resize(n_nn_cells);


        CUDA_LAUNCH((fill_nn_cells),itgg,cl_sparse.toKernel(),cells_nn.toKernel(),cells_nn_test.toKernel(),cells_nn_list.toKernel(),cells.size());


        sorted_to_not_sorted.resize(stop-start);

        non_sorted_to_sorted.resize(pl.size());


        auto ite = pl.getGPUIteratorTo(stop-start,64);


        // Here we reorder the particles to improve coalescing access

        CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),

                      decltype(pl.toKernel()),

                      decltype(sorted_to_not_sorted.toKernel()),

                      decltype(cells.toKernel()),

                      cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(),

                                                                           pl_prp.toKernel(),

                                                                           pl_prp_out.toKernel(),

                                                                           pl.toKernel(),

                                                                           pl_out.toKernel(),

                                                                           sorted_to_not_sorted.toKernel(),

                                                                           non_sorted_to_sorted.toKernel(),

                                                                           cells.toKernel());


        if (opt == cl_construct_opt::Full)

        {

            construct_domain_ids(gpuContext,start,stop,g_m);

        }


    #else


            std::cout << "Error: " <<  __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;


    #endif

    }


    void construct_domain_ids(gpu::ofp_context_t & gpuContext, size_t start, size_t stop, size_t g_m)

    {

#ifdef __NVCC__

        sorted_domain_particles_dg.resize(stop-start+1);


        auto ite = sorted_domain_particles_dg.getGPUIterator();


        CUDA_LAUNCH((mark_domain_particles),ite,sorted_to_not_sorted.toKernel(),sorted_domain_particles_dg.toKernel(),g_m);


        // lets scan

        openfpm::scan((unsigned int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),sorted_domain_particles_dg.size(),(unsigned int *)sorted_domain_particles_dg.template getDeviceBuffer<0>(),gpuContext);


        sorted_domain_particles_dg.template deviceToHost<0>(sorted_domain_particles_dg.size()-1,sorted_domain_particles_dg.size()-1);

        auto sz = sorted_domain_particles_dg.template get<0>(sorted_domain_particles_dg.size()-1);


        sorted_domain_particles_ids.resize(sz);


        CUDA_LAUNCH((collect_domain_ghost_ids),ite,sorted_domain_particles_dg.toKernel(),sorted_domain_particles_ids.toKernel());

#endif

    }


    template<typename vector, typename vector_prp, unsigned int ... prp>

    void construct_dense(vector & pl,

                         vector & pl_out,

                         vector_prp & pl_prp,

                         vector_prp & pl_prp_out,

                         gpu::ofp_context_t & gpuContext,

                         size_t g_m,

                         size_t start,

                         size_t stop,

                         cl_construct_opt opt = cl_construct_opt::Full)

    {

#ifdef __NVCC__


        // Than we construct the ids


        auto ite_gpu = pl.getGPUIteratorTo(stop-start-1);


        cl_n.resize(this->gr_cell.size()+1);

        cl_n.template fill<0>(0);


        part_ids.resize(stop - start);


        if (ite_gpu.wthr.x == 0 || pl.size() == 0 || stop == 0)

        {

            // no particles

            starts.resize(cl_n.size());

            starts.template fill<0>(0);

            return;

        }


        CUDA_LAUNCH((subindex<false,dim,T,cnt_type,ids_type>),ite_gpu,div_c,

                                                                        spacing_c,

                                                                        off,

                                                                        this->getTransform(),

                                                                        stop,

                                                                        start,

                                                                        pl.toKernel(),

                                                                        cl_n.toKernel(),

                                                                        part_ids.toKernel());


        // now we scan

        starts.resize(cl_n.size());

        openfpm::scan((cnt_type *)cl_n.template getDeviceBuffer<0>(), cl_n.size(), (cnt_type *)starts.template getDeviceBuffer<0>() , gpuContext);


        // now we construct the cells


        cells.resize(stop-start);

        auto itgg = part_ids.getGPUIterator();


#ifdef MAKE_CELLLIST_DETERMINISTIC


                CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0,

                                                                                            part_ids.size(),

                                                                                            cells.toKernel()) );


                // sort


                gpu::mergesort(static_cast<cnt_type *>(part_ids.template getDeviceBuffer<0>()),static_cast<cnt_type *>(cells.template getDeviceBuffer<0>()),pl.size(),gpu::less_t<cnt_type>(),gpuContext);


#else


                CUDA_LAUNCH((fill_cells<dim,cnt_type,ids_type,shift_ph<0,cnt_type>>),itgg,0,

                                                                                    div_c,

                                                                                    off,

                                                                                    part_ids.size(),

                                                                                    start,

                                                                                    starts.toKernel(),

                                                                                    part_ids.toKernel(),

                                                                                    cells.toKernel() );


#endif


        sorted_to_not_sorted.resize(stop-start);

        non_sorted_to_sorted.resize(pl.size());


        auto ite = pl.getGPUIteratorTo(stop-start,64);


        if (sizeof...(prp) == 0)

        {

            // Here we reorder the particles to improve coalescing access

            CUDA_LAUNCH((reorder_parts<decltype(pl_prp.toKernel()),

                          decltype(pl.toKernel()),

                          decltype(sorted_to_not_sorted.toKernel()),

                          decltype(cells.toKernel()),

                          cnt_type,shift_ph<0,cnt_type>>),ite,sorted_to_not_sorted.size(),

                                                                               pl_prp.toKernel(),

                                                                               pl_prp_out.toKernel(),

                                                                               pl.toKernel(),

                                                                               pl_out.toKernel(),

                                                                               sorted_to_not_sorted.toKernel(),

                                                                               non_sorted_to_sorted.toKernel(),

                                                                               cells.toKernel());

        }

        else

        {

            // Here we reorder the particles to improve coalescing access

            CUDA_LAUNCH((reorder_parts_wprp<decltype(pl_prp.toKernel()),

                          decltype(pl.toKernel()),

                          decltype(sorted_to_not_sorted.toKernel()),

                          decltype(cells.toKernel()),

                          cnt_type,shift_ph<0,cnt_type>,prp...>),ite,sorted_to_not_sorted.size(),

                                                                               pl_prp.toKernel(),

                                                                               pl_prp_out.toKernel(),

                                                                               pl.toKernel(),

                                                                               pl_out.toKernel(),

                                                                               sorted_to_not_sorted.toKernel(),

                                                                               non_sorted_to_sorted.toKernel(),

                                                                               cells.toKernel());

        }


        if (opt == cl_construct_opt::Full)

        {

            construct_domain_ids(gpuContext,start,stop,g_m);

        }


    #else


            std::cout << "Error: " <<  __FILE__ << ":" << __LINE__ << " you are calling CellList_gpu.construct() this function is suppose must be compiled with NVCC compiler, but it look like has been compiled by the standard system compiler" << std::endl;


    #endif

    }


public:


    typedef int yes_is_gpu_celllist;


    typedef T stype;


    static const unsigned int dims = dim;


    typedef cnt_type cnt_type_;


    typedef ids_type ids_type_;


    typedef transform transform_;


    typedef boost::mpl::bool_<is_sparse> is_sparse_;


    CellList_gpu(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> & clg)

    {

        this->operator=(clg);

    }


    CellList_gpu(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg)

    {

        this->operator=(clg);

    }


    CellList_gpu()

    {}


    CellList_gpu(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)

    {

        Initialize(box,div,pad);

    }


    void Initialize(const Box<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)

    {

        SpaceBox<dim,T> sbox(box);


        // Initialize point transformation


        Initialize(sbox,div,pad);

    }


    void setBoxNN(unsigned int n_NN)

    {

        cells_nn_test_size = n_NN;

        construct_cell_nn_test(n_NN);

    }


    void re_setBoxNN()

    {

        construct_cell_nn_test(cells_nn_test_size);

    }


    void Initialize(const SpaceBox<dim,T> & box, const size_t (&div)[dim], const size_t pad = 1)

    {

        Matrix<dim,T> mat;

        CellDecomposer_sm<dim,T,transform>::setDimensions(box,div, mat, pad);


        // create the array that store the number of particle on each cell and se it to 0

        InitializeStructures(this->gr_cell.getSize(),this->gr_cell.size(),pad);

    }


    vector_cnt_type & getSortToNonSort()

    {

        return sorted_to_not_sorted;

    }


    vector_cnt_type & getNonSortToSort()

    {

        return non_sorted_to_sorted;

    }


    vector_cnt_type & getDomainSortIds()

    {

        return sorted_domain_particles_ids;

    }


    void setRadius(T radius)

    {

        openfpm::vector<long int> nnc_rad_;


        NNcalc_rad(radius,nnc_rad_,this->getCellBox(),this->getGrid());


        nnc_rad.resize(nnc_rad_.size(),0);


        // copy to nnc_rad


        for (unsigned int i = 0 ; i < nnc_rad_.size() ; i++)

        {nnc_rad.template get<0>(i) = nnc_rad_.template get<0>(i);}


        nnc_rad.template hostToDevice<0>();

    }


    template<typename vector, typename vector_prp, unsigned int ... prp>

    void construct(vector & pl,

                   vector & pl_out,

                   vector_prp & pl_prp,

                   vector_prp & pl_prp_out,

                   gpu::ofp_context_t & gpuContext,

                   size_t g_m = 0,

                   size_t start = 0,

                   size_t stop = (size_t)-1,

                   cl_construct_opt opt = cl_construct_opt::Full)

    {

        // if stop if the default set to the number of particles

        if (stop == (size_t)-1)

        {stop = pl.size();}


        if (is_sparse == false) {construct_dense<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,gpuContext,g_m,start,stop,opt);}

        else {construct_sparse<vector,vector_prp,prp...>(pl,pl_out,pl_prp,pl_prp_out,gpuContext,g_m,start,stop,opt);}

    }


    CellList_gpu_ker<dim,T,cnt_type,ids_type,transform,is_sparse> toKernel()

    {

/*      if (nnc_rad.size() == 0) <----- Cannot call this anymore with openMP

        {

            // set the radius equal the cell spacing on direction X

            // (must be initialized to something to avoid warnings)

            setRadius(this->getCellBox().getHigh(0));

        }*/


        return CellList_gpu_ker_selector<dim,T,cnt_type,ids_type,Memory,transform,

                                  vector_cnt_type,openfpm::vector_gpu<aggregate<cnt_type,cnt_type>>,

                                  decltype(cl_sparse),is_sparse>

        ::get(starts,

                cells_nn,

                cells_nn_list,

                cl_sparse,

                sorted_to_not_sorted,

                sorted_domain_particles_ids,

                nnc_rad,

                spacing_c,

                div_c,

                off,

                this->getTransform(),

                g_m,

                this->box_unit,

                this->gr_cell,

                this->cell_shift);

    }


    void clear()

    {

        cl_n.clear();

        cells.clear();

        starts.clear();

        part_ids.clear();

        sorted_to_not_sorted.clear();

    }


    size_t g_m = 0;


    inline size_t get_gm()

    {

        return g_m;

    }


    inline void set_gm(size_t g_m)

    {

        this->g_m = g_m;

    }


    void set_ndec(size_t n_dec)

    {

        this->n_dec = n_dec;

    }


    size_t get_ndec() const

    {

        return n_dec;

    }


    void debug_deviceToHost()

    {

        cl_n.template deviceToHost<0>();

        cells.template deviceToHost<0>();

        starts.template deviceToHost<0>();

    }


    size_t getNCells()

    {

        return cl_n.size();

    }


    size_t getNelements(size_t i)

    {

        return cl_n.template get<0>(i);

    }


    inline auto get(size_t cell, size_t ele) -> decltype(cells.template get<0>(starts.template get<0>(cell)+ele))

    {

        return cells.template get<0>(starts.template get<0>(cell)+ele);

    }


    inline auto get(size_t cell, size_t ele) const -> decltype(cells.template get<0>(starts.template get<0>(cell)+ele))

    {

        return cells.template get<0>(starts.template get<0>(cell)+ele);

    }


    void swap(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg)

    {

        ((CellDecomposer_sm<dim,T,transform> *)this)->swap(clg);

        cl_n.swap(clg.cl_n);

        cells.swap(clg.cells);

        starts.swap(clg.starts);

        part_ids.swap(clg.part_ids);

        cl_sparse.swap(clg.cl_sparse);

        cells_nn.swap(clg.cells_nn);

        cells_nn_list.swap(clg.cells_nn_list);

        cells_nn_test.swap(clg.cells_nn_test);

        sorted_to_not_sorted.swap(clg.sorted_to_not_sorted);

        sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg);

        sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids);

        non_sorted_to_sorted.swap(clg.non_sorted_to_sorted);


        spacing_c.swap(clg.spacing_c);

        div_c.swap(clg.div_c);

        off.swap(clg.off);


        size_t g_m_tmp = g_m;

        g_m = clg.g_m;

        clg.g_m = g_m_tmp;


        size_t n_dec_tmp = n_dec;

        n_dec = clg.n_dec;

        clg.n_dec = n_dec_tmp;


        int cells_nn_test_size_tmp = cells_nn_test_size;

        cells_nn_test_size = clg.cells_nn_test_size;

        clg.cells_nn_test_size = cells_nn_test_size_tmp;

    }


    CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> &

    operator=(const CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type,is_sparse> & clg)

    {

        *static_cast<CellDecomposer_sm<dim,T,transform> *>(this) = *static_cast<const CellDecomposer_sm<dim,T,transform> *>(&clg);

        cl_n = clg.cl_n;

        cells = clg.cells;

        starts = clg.starts;

        part_ids = clg.part_ids;

        cl_sparse = clg.cl_sparse;

        cells_nn = clg.cells_nn;

        cells_nn_list = clg.cells_nn_list;

        cells_nn_test = clg.cells_nn_test;

        sorted_to_not_sorted = clg.sorted_to_not_sorted;

        sorted_domain_particles_dg = clg.sorted_domain_particles_dg;

        sorted_domain_particles_ids = clg.sorted_domain_particles_ids;

        non_sorted_to_sorted = clg.non_sorted_to_sorted;


        spacing_c = clg.spacing_c;

        div_c = clg.div_c;

        off = clg.off;

        g_m = clg.g_m;

        n_dec = clg.n_dec;


        cells_nn_test_size = clg.cells_nn_test_size;


        return *this;

    }


    CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> &

    operator=(CellList_gpu<dim,T,Memory,transform,cnt_type,ids_type> && clg)

    {

        static_cast<CellDecomposer_sm<dim,T,transform> *>(this)->swap(*static_cast<CellDecomposer_sm<dim,T,transform> *>(&clg));

        cl_n.swap(clg.cl_n);

        cells.swap(clg.cells);

        starts.swap(clg.starts);

        part_ids.swap(clg.part_ids);

        cl_sparse.swap(clg.cl_sparse);

        cells_nn.swap(clg.cells_nn);

        cells_nn_list.swap(clg.cells_nn_list);

        cells_nn_test.swap(clg.cells_nn_test);

        sorted_to_not_sorted.swap(clg.sorted_to_not_sorted);

        sorted_domain_particles_dg.swap(clg.sorted_domain_particles_dg);

        sorted_domain_particles_ids.swap(clg.sorted_domain_particles_ids);

        non_sorted_to_sorted.swap(clg.non_sorted_to_sorted);


        spacing_c = clg.spacing_c;

        div_c = clg.div_c;

        off = clg.off;

        g_m = clg.g_m;

        n_dec = clg.n_dec;


        cells_nn_test_size = clg.cells_nn_test_size;


        return *this;

    }

};


// This is a tranformation node for vector_distributed for the algorithm toKernel_tranform

template<template <typename> class layout_base, typename T>

struct toKernel_transform<layout_base,T,4>

{

    typedef CellList_gpu_ker<T::dims,

                             typename T::stype,

                             typename T::cnt_type_,

                             typename T::ids_type_,

                             typename T::transform_,

                             T::is_sparse_::value> type;

};


#endif


#endif /* OPENFPM_DATA_SRC_NN_CELLLIST_CELLLIST_GPU_HPP_ */

Box
This class represent an N-dimensional box.
Definition Box.hpp:61

CellList_gpu_ker
Definition CellList_gpu_ker.cuh:420

Matrix
This class implement an NxN (dense) matrix.
Definition Matrix.hpp:33

Point
This class implement the point shape in an N-dimensional space.
Definition Point.hpp:28

SpaceBox
This class represent an N-dimensional box.
Definition SpaceBox.hpp:27

grid_key_dx_iterator_sub
Declaration grid_key_dx_iterator_sub.
Definition grid_key_dx_iterator_sub.hpp:163

grid_key_dx
grid_key_dx is the key to access any element in the grid
Definition grid_key.hpp:19

grid_key_dx::set_d
__device__ __host__ void set_d(index_type i, index_type id)
Set the i index.
Definition grid_key.hpp:516

grid_sm
Declaration grid_sm.
Definition grid_sm.hpp:167

no_transform_only
No transformation.
Definition CellDecomposer.hpp:401

openfpm::array
Definition array_openfpm.hpp:38

openfpm::vector_sparse
Definition map_vector_sparse.hpp:789

openfpm::vector_sparse::clear
void clear()
Clear all from all the elements.
Definition map_vector_sparse.hpp:1961

openfpm::vector_sparse::toKernel
vector_sparse_gpu_ker< T, Ti, layout_base > toKernel()
toKernel function transform this structure into one that can be used on GPU
Definition map_vector_sparse.hpp:1873

openfpm::vector_sparse::size
size_t size()
Return how many element you have in this map.
Definition map_vector_sparse.hpp:1829

openfpm::vector_sparse::setGPUInsertBuffer
void setGPUInsertBuffer(int nblock, int nslot)
set the gpu insert buffer for every block
Definition map_vector_sparse.hpp:1892

openfpm::vector
Implementation of 1-D std::vector like structure.
Definition map_vector.hpp:203

openfpm::vector::size
size_t size()
Stub size.
Definition map_vector.hpp:211

cub::int
KeyT const ValueT ValueT OffsetIteratorT OffsetIteratorT int
[in] The number of segments that comprise the sorting data
Definition dispatch_radix_sort.cuh:336

aggregate
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition aggregate.hpp:215

ele
Definition vector_performance_test.cu:160

gpu::less_t
Definition cudify_alpaka.hpp:132

gpu::ofp_context_t
Definition ofp_context.hpp:303

ite_gpu
Definition grid_sm.hpp:102

memory_traits_inte
Transform the boost::fusion::vector into memory specification (memory_traits)
Definition memory_conf.hpp:84

shift_ph
Definition Cuda_cell_list_util_func.hpp:243

toKernel_transform
Definition tokernel_transformation.hpp:203