4 #ifndef OPENFPM_PDATA_DCPSE_CUH
5 #define OPENFPM_PDATA_DCPSE_CUH
9 #include "Vector/vector_dist.hpp"
10 #include "MonomialBasis.hpp"
11 #include "SupportBuilder.hpp"
12 #include "SupportBuilder.cuh"
13 #include "Vandermonde.hpp"
14 #include "DcpseDiagonalScalingMatrix.hpp"
15 #include "DcpseRhs.hpp"
21 #include <cuda_runtime.h>
22 #include <cusolverDn.h>
25 template<
unsigned int dim,
typename particles_type,
typename T,
typename monomialBasis_type,
typename supportKey_type,
typename localEps_type,
typename calcKernels_type>
26 __global__
void calcKernels_gpu(
particles_type, monomialBasis_type, supportKey_type, supportKey_type, T**, localEps_type,
size_t, calcKernels_type);
28 template<
unsigned int dim,
typename T,
typename particles_type,
typename monomialBasis_type,
typename supportKey_type,
typename localEps_type,
typename matrix_type>
30 T**, T**, localEps_type, localEps_type, matrix_type,
size_t,
size_t);
33 template<
unsigned int dim,
typename VerletList_type,
typename vector_type,
class T =
typename vector_type::stype>
35 static_assert(std::is_floating_point<T>::value,
"CUBLAS supports only float or double");
51 double HOverEpsilon=0.9;
54 const unsigned int differentialOrder;
58 bool isSharedSupport =
false;
69 unsigned int convergenceOrder;
71 size_t maxSupportSize;
72 size_t supportKeysTotalN;
78 int getUpdateCtr()
const
88 VerletList_type& verletList,
90 unsigned int convergenceOrder,
92 support_options opt = support_options::RADIUS
95 differentialSignature(differentialSignature),
96 differentialOrder(
Monomial<dim>(differentialSignature).order()),
97 monomialBasis(differentialSignature.asArray(), convergenceOrder),
103 initializeStaticSize(
particles, convergenceOrder, rCut);
108 VerletList_type& verletList,
109 const Dcpse_gpu<dim, VerletList_type, vector_type, T>& other,
111 unsigned int convergenceOrder,
113 support_options opt = support_options::RADIUS
116 differentialSignature(differentialSignature),
117 differentialOrder(
Monomial<dim>(differentialSignature).order()),
118 monomialBasis(differentialSignature.asArray(), convergenceOrder),
119 supportRefs(other.supportRefs),
120 supportKeys1D(other.supportKeys1D),
121 kerOffsets(other.kerOffsets),
122 maxSupportSize(other.maxSupportSize),
123 supportKeysTotalN(other.supportKeysTotalN),
124 isSharedSupport(true)
127 initializeStaticSize(
particles, convergenceOrder, rCut);
130 template<
unsigned int prp>
134 size_t kerOff = kerOffsets.get(k);
136 size_t supportKeysSize = kerOffsets.get(k+1)-kerOffsets.get(k);
137 size_t* supportKeys = &((
size_t*)supportKeys1D.getPointer())[kerOffsets.get(k)];
139 for (
int i = 0; i < supportKeysSize; i++)
141 size_t xqK = supportKeys[i];
143 particles.template getProp<prp>(xqK) += calcKernels.get(kerOff+i);
147 template<
unsigned int prp>
151 size_t kerOff = kerOffsets.get(k);
152 size_t supportKeysSize = kerOffsets.get(k+1)-kerOffsets.get(k);
153 size_t* supportKeys = &((
size_t*)supportKeys1D.getPointer())[kerOffsets.get(k)];
155 for (
int i = 0; i < supportKeysSize; i++)
157 size_t xqK = supportKeys[i];
159 particles.template getProp<prp>(xqK) = 1.0;
163 template<
unsigned int prp>
167 size_t kerOff = kerOffsets.get(k);
168 size_t supportKeysSize = kerOffsets.get(k+1)-kerOffsets.get(k);
169 size_t* supportKeys = &((
size_t*)supportKeys1D.getPointer())[kerOffsets.get(k)];
171 for (
int i = 0; i < supportKeysSize; i++)
173 size_t xqK = supportKeys[i];
175 particles.template getProp<prp>(xqK)[i] += calcKernels.get(kerOff+i);
184 momenta.resize(monomialBasis.size());
185 momenta_accu.resize(monomialBasis.size());
187 for (
int i = 0; i < momenta.
size(); i++)
189 momenta.template get<0>(i) = 3000000000.0;
190 momenta.template get<1>(i) = -3000000000.0;
194 for (
size_t j = 0; j < N; ++j)
196 double eps = localEps.get(j);
198 for (
int i = 0; i < momenta.
size(); i++)
200 momenta_accu.template get<0>(i) = 0.0;
203 size_t xpK = supportRefs.get(j);
206 size_t kerOff = kerOffsets.get(xpK);
207 size_t supportKeysSize = kerOffsets.get(j+1)-kerOffsets.get(j);
208 size_t* supportKeys = &((
size_t*)supportKeys1D.getPointer())[kerOffsets.get(j)];
210 for (
int i = 0; i < supportKeysSize; i++)
212 size_t xqK = supportKeys[i];
216 auto ker = calcKernels.get(kerOff+i);
219 size_t N = monomialBasis.getElements().size();
221 for (
size_t i = 0; i < N; ++i)
225 T mbValue = m.evaluate(normalizedArg);
226 momenta_accu.template get<0>(counter) += mbValue * ker;
232 for (
int i = 0; i < momenta.
size(); i++)
234 if (momenta_accu.template get<0>(i) < momenta.template get<0>(i))
236 momenta.template get<0>(i) = momenta_accu.template get<0>(i);
239 if (momenta_accu.template get<1>(i) > momenta.template get<1>(i))
241 momenta.template get<1>(i) = momenta_accu.template get<0>(i);
246 for (
int i = 0; i < momenta.
size(); i++)
248 std::cout <<
"MOMENTA: " << monomialBasis.getElements()[i] <<
"Min: " << momenta.template get<0>(i) <<
" " <<
"Max: " << momenta.template get<1>(i) << std::endl;
260 template<
unsigned int fValuePos,
unsigned int DfValuePos>
263 if (differentialOrder % 2 == 0) {
268 for (
size_t j = 0; j < N; ++j)
270 double epsInvPow = localEpsInvPow.get(j);
273 size_t xpK = supportRefs.get(j);
275 T fxp = sign *
particles.template getProp<fValuePos>(xpK);
276 size_t kerOff = kerOffsets.get(xpK);
278 size_t supportKeysSize = kerOffsets.get(j+1)-kerOffsets.get(j);
279 size_t* supportKeys = &((
size_t*)supportKeys1D.getPointer())[kerOffsets.get(j)];
281 for (
int i = 0; i < supportKeysSize; i++)
283 size_t xqK = supportKeys[i];
284 T fxq =
particles.template getProp<fValuePos>(xqK);
286 Dfxp += (fxq + fxp) * calcKernels.get(kerOff+i);
290 particles.template getProp<DfValuePos>(xpK) = Dfxp;
302 return kerOffsets.get(key.
getKey()+1)-kerOffsets.get(key.
getKey());
315 size_t base = kerOffsets.get(key.
getKey());
316 return calcKernels.get(base + j);
326 size_t* supportKeys = &((
size_t*)supportKeys1D.getPointer())[kerOffsets.get(key.
getKey())];
327 return supportKeys[j];
334 if (differentialOrder % 2 == 0) {
343 return localEpsInvPow.get(key.
getKey());
354 template<
typename op_type>
356 op_type &o1) -> decltype(is_scalar<std::is_fundamental<decltype(o1.value(
357 key))>::value>::analyze(key, o1)) {
359 typedef decltype(is_scalar<std::is_fundamental<decltype(o1.value(key))>::value>::analyze(key, o1)) expr_type;
362 if (differentialOrder % 2 == 0) {
366 double eps = localEps.get(key.
getKey());
367 double epsInvPow = localEpsInvPow.get(key.
getKey());
372 if(
particles.getMapCtr()!=this->getUpdateCtr())
374 std::cerr<<__FILE__<<
":"<<__LINE__<<
" Error: You forgot a DCPSE operator update after map."<<std::endl;
379 size_t xpK = supportRefs.get(key.
getKey());
381 expr_type fxp = sign * o1.value(key);
382 size_t kerOff = kerOffsets.get(xpK);
384 size_t supportKeysSize = kerOffsets.get(key.
getKey()+1)-kerOffsets.get(key.
getKey());
385 size_t* supportKeys = &((
size_t*)supportKeys1D.getPointer())[kerOffsets.get(key.
getKey())];
387 for (
int i = 0; i < supportKeysSize; i++)
389 size_t xqK = supportKeys[i];
391 Dfxp = Dfxp + (fxq + fxp) * calcKernels.get(kerOff+i);
393 Dfxp = Dfxp * epsInvPow;
409 template<
typename op_type>
412 int i) ->
typename decltype(is_scalar<std::is_fundamental<decltype(o1.value(
413 key))>::value>::analyze(key, o1))::coord_type {
415 typedef typename decltype(is_scalar<std::is_fundamental<decltype(o1.value(key))>::value>::analyze(key, o1))::coord_type expr_type;
418 if (differentialOrder % 2 == 0) {
422 double eps = localEps.get(key.
getKey());
423 double epsInvPow = localEpsInvPow.get(key.
getKey());
428 if(
particles.getMapCtr()!=this->getUpdateCtr())
430 std::cerr<<__FILE__<<
":"<<__LINE__<<
" Error: You forgot a DCPSE operator update after map."<<std::endl;
435 size_t xpK = supportRefs.get(key.
getKey());
438 expr_type fxp = sign * o1.value(key)[i];
439 size_t kerOff = kerOffsets.get(xpK);
440 size_t supportKeysSize = kerOffsets.get(key.
getKey()+1)-kerOffsets.get(key.
getKey());
441 size_t* supportKeys = &((
size_t*)supportKeys1D.getPointer())[kerOffsets.get(key.
getKey())];
443 for (
int j = 0; j < supportKeysSize; j++)
445 size_t xqK = supportKeys[j];
447 Dfxp = Dfxp + (fxq + fxp) * calcKernels.get(kerOff+j);
449 Dfxp = Dfxp * epsInvPow;
463 supportKeys1D.clear();
466 localEpsInvPow.clear();
469 initializeStaticSize(
particles, convergenceOrder, rCut);
477 void save(
const std::string &file){
478 auto & v_cl=create_vcluster();
485 Packer<decltype(localEpsInvPow),
CudaMemory>::packRequest(localEpsInvPow,req);
503 std::ofstream dump (file+
"_"+std::to_string(v_cl.rank()), std::ios::out | std::ios::binary);
504 if (dump.is_open() ==
false)
505 { std::cerr << __FILE__ <<
":" << __LINE__ <<
" Unable to write since dump is open at rank "<<v_cl.rank()<<std::endl;
516 void load(
const std::string & file)
518 auto & v_cl=create_vcluster();
519 std::ifstream fs (file+
"_"+std::to_string(v_cl.rank()), std::ios::in | std::ios::binary | std::ios::ate );
520 if (fs.is_open() ==
false)
522 std::cerr << __FILE__ <<
":" << __LINE__ <<
" error, opening file: " << file << std::endl;
527 size_t sz = fs.tellg();
532 std::ifstream input (file+
"_"+std::to_string(v_cl.rank()), std::ios::in | std::ios::binary );
533 if (input.is_open() ==
false)
543 mem.allocate(pmem.
size());
563 template <
typename U>
564 void initializeStaticSize(
566 unsigned int convergenceOrder,
573 this->convergenceOrder=convergenceOrder;
575 if (!isSharedSupport) {
581 std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
584 if (opt==support_options::RADIUS) {
585 if (!isSharedSupport) {
586 while (it.isNext()) {
593 supportBuilder.getSupport(supportRefs.
size(), kerOffsets, supportKeys1D, maxSupportSize, supportKeysTotalN);
599 std::cerr<<__FILE__<<
":"<<__LINE__<<
" Error: DC-PSE on GPU supports support_options::RADIUS only!"<<std::endl;
602 std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
603 std::chrono::duration<double> time_span2 = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1);
604 std::cout <<
"Support building took " << time_span2.count() * 1000. <<
" milliseconds." << std::endl;
606 assembleLocalMatrices_t(rCut);
611 template <
typename U>
void assembleLocalMatrices_t(U rCut) {
612 throw std::invalid_argument(
"DCPSE operator error: CUBLAS supports only float or double"); }
614 void assembleLocalMatrices_t(
float rCut) {
615 assembleLocalMatrices(cublasSgetrfBatched, cublasStrsmBatched); }
617 void assembleLocalMatrices_t(
double rCut) {
618 assembleLocalMatrices(cublasDgetrfBatched, cublasDtrsmBatched); }
620 template<
typename cublasLUDec_type,
typename cublasTriangSolve_type>
621 void assembleLocalMatrices(cublasLUDec_type cublasLUDecFunc, cublasTriangSolve_type cublasTriangSolveFunc) {
622 std::chrono::high_resolution_clock::time_point t3 = std::chrono::high_resolution_clock::now();
625 auto& basis = monomialBasis.getBasis();
627 basisTemp.template hostToDevice();
630 size_t numMatrices = supportRefs.
size();
631 size_t monomialBasisSize = monomialBasis.size();
633 int numSMs, numSMsMult = 1;
634 cudaDeviceGetAttribute(&numSMs, cudaDevAttrMultiProcessorCount, 0);
635 size_t numThreads = numSMs*numSMsMult*256;
636 std::cout <<
"numThreads " << numThreads <<
" numMatrices " << numMatrices << std::endl;
648 auto AMatKernel = AMat.toKernel(); T* AMatKernelPointer = (T*) AMatKernel.getPointer();
649 for (
size_t i = 0; i < numMatrices; i++) AMatPointers.get(i) = AMatKernelPointer + i*monomialBasisSize*monomialBasisSize;
651 auto bVecKernel = bVec.toKernel(); T* bVecKernelPointer = (T*) bVecKernel.getPointer();
652 for (
size_t i = 0; i < numMatrices; i++) bVecPointers.get(i) = bVecKernelPointer + i*monomialBasisSize;
654 std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
655 std::chrono::duration<double> time_span0 = std::chrono::duration_cast<std::chrono::duration<double>>(t1 - t3);
656 std::cout <<
"Preallocation took " << time_span0.count() * 1000. <<
" milliseconds." << std::endl;
659 std::chrono::high_resolution_clock::time_point t9 = std::chrono::high_resolution_clock::now();
661 supportRefs.template hostToDevice();
662 AMatPointers.template hostToDevice();
663 bVecPointers.template hostToDevice();
665 auto AMatPointersKernel = AMatPointers.toKernel(); T** AMatPointersKernelPointer = (T**) AMatPointersKernel.getPointer();
666 auto bVecPointersKernel = bVecPointers.toKernel(); T** bVecPointersKernelPointer = (T**) bVecPointersKernel.getPointer();
668 assembleLocalMatrices_gpu<<<numSMsMult*numSMs, 256>>>(
particles.toKernel(), differentialSignature, differentialOrder, monomialBasisKernel, supportRefs.toKernel(), kerOffsets.toKernel(), supportKeys1D.toKernel(),
669 AMatPointersKernelPointer, bVecPointersKernelPointer, localEps.toKernel(), localEpsInvPow.toKernel(), BMat.toKernel(), numMatrices, maxSupportSize);
671 localEps.template deviceToHost();
672 localEpsInvPow.template deviceToHost();
674 std::chrono::high_resolution_clock::time_point t10 = std::chrono::high_resolution_clock::now();
675 std::chrono::duration<double> time_span3 = std::chrono::duration_cast<std::chrono::duration<double>>(t10 - t9);
676 std::cout <<
"assembleLocalMatrices_gpu took " << time_span3.count() * 1000. <<
" milliseconds." << std::endl;
679 std::chrono::high_resolution_clock::time_point t7 = std::chrono::high_resolution_clock::now();
680 cublasHandle_t cublas_handle; cublasCreate_v2(&cublas_handle);
683 cublasLUDecFunc(cublas_handle, monomialBasisSize, AMatPointersKernelPointer, monomialBasisSize, NULL, (
int*) infoArrayKernel.getPointer(), numMatrices);
684 cudaDeviceSynchronize();
686 infoArray.template deviceToHost();
687 for (
size_t i = 0; i < numMatrices; i++)
688 if (infoArray.get(i) != 0) fprintf(stderr,
"Factorization of matrix %d Failed: Matrix may be singular\n", i);
690 const double alpha = 1.f;
691 cublasTriangSolveFunc(cublas_handle, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_LOWER, CUBLAS_OP_N, CUBLAS_DIAG_UNIT, monomialBasisSize, 1, &alpha, AMatPointersKernelPointer, monomialBasisSize, bVecPointersKernelPointer, monomialBasisSize, numMatrices);
692 cublasTriangSolveFunc(cublas_handle, CUBLAS_SIDE_LEFT, CUBLAS_FILL_MODE_UPPER, CUBLAS_OP_N, CUBLAS_DIAG_NON_UNIT, monomialBasisSize, 1, &alpha, AMatPointersKernelPointer, monomialBasisSize, bVecPointersKernelPointer, monomialBasisSize, numMatrices);
693 cudaDeviceSynchronize();
695 std::chrono::high_resolution_clock::time_point t8 = std::chrono::high_resolution_clock::now();
696 std::chrono::duration<double> time_span4 = std::chrono::duration_cast<std::chrono::duration<double>>(t8 - t7);
697 std::cout <<
"cublas took " << time_span4.count() * 1000. <<
" milliseconds." << std::endl;
699 std::chrono::high_resolution_clock::time_point t5 = std::chrono::high_resolution_clock::now();
701 calcKernels.resize(supportKeysTotalN);
702 localEps.template hostToDevice();
703 auto it2 =
particles.getDomainIteratorGPU(512);
704 calcKernels_gpu<dim><<<it2.wthr,it2.thr>>>(
particles.toKernel(), monomialBasisKernel, kerOffsets.toKernel(), supportKeys1D.toKernel(), bVecPointersKernelPointer, localEps.toKernel(), numMatrices, calcKernels.toKernel());
705 calcKernels.template deviceToHost();
707 std::chrono::high_resolution_clock::time_point t6 = std::chrono::high_resolution_clock::now();
708 std::chrono::duration<double> time_span5 = std::chrono::duration_cast<std::chrono::duration<double>>(t6 - t5);
709 std::cout <<
"calcKernels_gpu took " << time_span5.count() * 1000. <<
" milliseconds." << std::endl;
712 cublasDestroy_v2(cublas_handle);
714 std::chrono::high_resolution_clock::time_point t4 = std::chrono::high_resolution_clock::now();
715 std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(t4 - t3);
716 std::cout <<
"Matrices inverse took " << time_span.count() * 1000. <<
" milliseconds." << std::endl;
719 T computeKernel(
Point<dim, T> x, EMatrix<T, Eigen::Dynamic, 1> & a)
const {
720 unsigned int counter = 0;
721 T res = 0, expFactor = exp(-norm2(x));
723 size_t N = monomialBasis.getElements().size();
724 for (
size_t i = 0; i < N; ++i)
728 T coeff = a(counter);
729 T mbValue = m.evaluate(x);
730 res += coeff * mbValue * expFactor;
740 unsigned int counter = 0;
741 T res = 0, expFactor = exp(-norm2(x));
743 size_t N = monomialBasis.getElements().size();
744 for (
size_t i = 0; i < N; ++i)
748 T coeff = a[counter];
749 T mbValue = m.evaluate(x);
750 res += coeff * mbValue * expFactor;
757 T conditionNumber(
const EMatrix<T, -1, -1> &V, T condTOL)
const {
758 std::cout <<
"conditionNumber" << std::endl;
759 Eigen::JacobiSVD<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>> svd(V);
760 T cond = svd.singularValues()(0)
761 / svd.singularValues()(svd.singularValues().size() - 1);
762 if (cond > condTOL) {
764 <<
"WARNING: cond(V) = " << cond
765 <<
" is greater than TOL = " << condTOL
766 <<
", numPoints(V) = " << V.rows()
775 template<
unsigned int dim,
typename T,
typename particles_type,
typename monomialBasis_type,
typename supportKey_type,
typename localEps_type,
typename matrix_type>
776 __global__
void assembleLocalMatrices_gpu(
778 supportKey_type supportRefs, supportKey_type kerOffsets, supportKey_type supportKeys1D, T** h_A, T** h_b, localEps_type localEps, localEps_type localEpsInvPow,
779 matrix_type BMat,
size_t numMatrices,
size_t maxSupportSize)
782 size_t monomialBasisSize = monomialBasis.size();
783 size_t BStartPos = maxSupportSize * monomialBasisSize * p_key; T* B = &((T*)BMat.getPointer())[BStartPos];
784 const auto& basisElements = monomialBasis.getElements();
785 int rhsSign = (
Monomial_gpu<dim>(differentialSignature).order() % 2 == 0) ? 1 : -1;
789 p_key += blockDim.x * gridDim.x)
793 size_t supportKeysSize = kerOffsets.get(p_key+1)-kerOffsets.get(p_key);
794 size_t* supportKeys = &((
size_t*)supportKeys1D.getPointer())[kerOffsets.get(p_key)];
795 size_t xpK = supportRefs.get(p_key);
797 assert(supportKeysSize >= monomialBasis.size());
799 T FACTOR = 2, avgNeighbourSpacing = 0;
800 for (
int i = 0 ; i < supportKeysSize; i++) {
802 for (
size_t j = 0; j < dim; ++j)
803 avgNeighbourSpacing += fabs(off.
value(j));
806 avgNeighbourSpacing /= supportKeysSize;
807 T eps = FACTOR * avgNeighbourSpacing;
811 localEps.get(p_key) = eps;
812 localEpsInvPow.get(p_key) = 1.0 / pow(eps,differentialOrder);
815 for (
int i = 0; i < supportKeysSize; ++i)
816 for (
int j = 0; j < monomialBasisSize; ++j) {
820 T V_ij = m.evaluate(off) / pow(eps, m.order());
821 T E_ii = exp(- norm2(off) / (2.0 * eps * eps));
822 B[i*monomialBasisSize+j] = E_ii * V_ij;
827 for (
int i = 0; i < monomialBasisSize; ++i)
828 for (
int j = 0; j < monomialBasisSize; ++j) {
829 for (
int k = 0; k < supportKeysSize; ++k)
830 sum += B[k*monomialBasisSize+i] * B[k*monomialBasisSize+j];
832 h_A[p_key][i*monomialBasisSize+j] =
sum;
sum = 0.0;
836 for (
size_t i = 0; i < monomialBasisSize; ++i) {
837 const Monomial_gpu<dim>& dm = basisElements.get(i).getDerivative(differentialSignature);
843 template<
unsigned int dim,
typename particles_type,
typename T,
typename monomialBasis_type,
typename supportKey_type,
typename localEps_type,
typename calcKernels_type>
844 __global__
void calcKernels_gpu(
particles_type particles, monomialBasis_type monomialBasis, supportKey_type kerOffsets, supportKey_type supportKeys1D,
845 T** h_b, localEps_type localEps,
size_t numMatrices, calcKernels_type calcKernels)
850 size_t monomialBasisSize = monomialBasis.size();
851 const auto& basisElements = monomialBasis.getElements();
852 size_t supportKeysSize = kerOffsets.get(p_key+1)-kerOffsets.get(p_key);
853 size_t* supportKeys = &((
size_t*)supportKeys1D.getPointer())[kerOffsets.get(p_key)];
855 T* calcKernelsLocal = &((T*)calcKernels.getPointer())[kerOffsets.get(p_key)];
856 T eps = localEps.get(p_key);
858 for (
size_t j = 0; j < supportKeysSize; ++j)
860 size_t xqK = supportKeys[j];
863 T expFactor = exp(-norm2(offNorm));
866 for (
size_t i = 0; i < monomialBasisSize; ++i) {
868 T mbValue = m.evaluate(offNorm);
869 T coeff = h_b[p_key][i];
871 res += coeff * mbValue * expFactor;
873 calcKernelsLocal[j] = res;
virtual size_t size() const
the the size of the allocated memory
virtual void * getPointer()
get a readable pointer with the data
__device__ __host__ T & value(size_t i)
Return the reference to the value at coordinate i.
Implementation of 1-D std::vector like structure.
Grid key for a distributed grid.
__device__ __host__ size_t getKey() const
Get the key.
size_t size_local() const
return the local size of the vector
auto getPos(vect_dist_key_dx vec_key) -> decltype(vPos.template get< 0 >(vec_key.getKey()))
Get the position of an element.
vector_dist_iterator getDomainIterator() const
Get an iterator that traverse the particles in the domain.
void hostToDevicePos()
Move the memory from the device to host memory.
void ghost_get_subset()
Stub does not do anything.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Transform the boost::fusion::vector into memory specification (memory_traits)
It model an expression expr1 + ... exprn.