8#ifndef MAP_VECTOR_SPARSE_CUDA_KER_CUH_
9#define MAP_VECTOR_SPARSE_CUDA_KER_CUH_
11#include "util/for_each_ref.hpp"
14template<
typename dim3Ta,
typename dim3Tb>
15inline __device__ __host__
int dim3CoordToInt(
const dim3Ta & coord,
const dim3Tb & dimensions)
25inline __device__ __host__
int dim3CoordToInt(
int coord,
int dimension)
32 template<
typename index_type>
38#if defined(__NVCC__) && !defined(CUDA_ON_CPU)
39 static __shared__
int vct_atomic_add;
40 static __shared__
int vct_atomic_rem;
45 template<
typename>
class layout_base>
77 if (vct_index.
size() == 0) {
id = 0;
return;}
78 const Ti *base = &vct_index.template get<0>(0);
79 const Ti *end = (
const Ti *)vct_index.template getPointer<0>() + vct_index.
size();
80 Ti n = vct_data.
size()-1;
84 base = (base[half] < x) ? base+half : base;
88 int off = (*base < x);
89 id = base - &vct_index.template get<0>(0) + off;
90 Ti v = (base + off != end)?*(base + off):(Ti)-1;
91 id = (x == v)?
id:vct_data.
size()-1;
96 typedef Ti index_type;
110 :vct_index(vct_index),vct_data(vct_data),
111 vct_add_index(vct_add_index),vct_rem_index(vct_rem_index),vct_add_data(vct_add_data),
112 vct_nadd_index(vct_nadd_index),vct_nrem_index(vct_nrem_index),
113 nslot_add(nslot_add),nslot_rem(nslot_rem)
123 return vct_index.
size();
132 if (threadIdx.x == 0)
148 if (threadIdx.x == 0)
150 int blockId = dim3CoordToInt(blockIdx, gridDim);
151 vct_atomic_add = vct_nadd_index.template get<0>(blockId);
164 if (threadIdx.x == 0)
166 int blockId = dim3CoordToInt(blockIdx, gridDim);
167 vct_atomic_rem = vct_nrem_index.template get<0>(blockId);
197 template <
unsigned int p>
200 return vct_data.template get<p>(vct_data.
size()-1);
213 template <
unsigned int p>
214 __device__
inline auto get(Ti
id)
const ->
decltype(vct_data.template get<p>(
id))
218 return vct_data.template get<p>(di);
221 __device__
inline auto get(Ti
id)
const ->
decltype(vct_data.
get(0))
225 return vct_data.
get(
static_cast<size_t>(di));
238 template <
unsigned int p>
241 return vct_data.template get<p>(
id.
id);
254 template <
unsigned int p>
257 return vct_data.template get<p>(
id.
id);
268 return vct_index.template get<0>(
id.
id);
281 template <
unsigned int p>
282 __device__
inline auto get(Ti
id, Ti & di)
const ->
decltype(vct_data.template get<p>(
id))
285 return vct_data.template get<p>(di);
298 template <
unsigned int p>
299 __device__
inline auto get_ele(Ti di)
const ->
decltype(vct_data.template get<p>(di))
301 return vct_data.template get<p>(di);
308 template <
unsigned int p>
309 __device__
auto insert(Ti
ele) ->
decltype(vct_data.template get<p>(0))
313 int blockId = dim3CoordToInt(blockIdx, gridDim);
314 int slot_base = blockId;
316 int pos = atomicAdd(&vct_atomic_add,1);
317 vct_add_index.template get<0>(slot_base*nslot_add+pos) =
ele;
318 return vct_add_data.template get<p>(slot_base*nslot_add+pos);
321 printf(
"vector_sparse_gpu_ker.insert[1]: Error, this function in order to work is supposed to be compiled with nvcc\n");
337 int blockId = dim3CoordToInt(blockIdx, gridDim);
338 int slot_base = blockId;
340 int pos = atomicAdd(&vct_atomic_rem,1);
341 vct_rem_index.template get<0>(slot_base*nslot_rem+pos) =
ele;
344 printf(
"vector_sparse_gpu_ker.remove: Error, this function in order to work is supposed to be compiled with nvcc\n");
359 int blockId = dim3CoordToInt(blockIdx, gridDim);
360 int slot_base = blockId;
362 int pos = atomicAdd(&vct_atomic_add,1);
363 vct_add_index.template get<0>(slot_base*nslot_add+pos) =
ele;
365 return vct_add_data.
get(slot_base*nslot_add+pos);
367 printf(
"vector_sparse_gpu_ker.insert[2]: Error, this function in order to work is supposed to be compiled with nvcc\n");
379 int pos = atomicAdd(&vct_atomic_rem,1);
380 vct_rem_index.template get<0>(slot_base*nslot_rem+pos) =
ele;
383 printf(
"vector_sparse_gpu_ker.remove_b: Error, this function in order to work is supposed to be compiled with nvcc\n");
391 template <
unsigned int p>
392 __device__
auto insert_b(Ti
ele,Ti slot_base) ->
decltype(vct_data.template get<p>(0))
396 int pos = atomicAdd(&vct_atomic_add,1);
397 vct_add_index.template get<0>(slot_base*nslot_add+pos) =
ele;
398 return vct_add_data.template get<p>(slot_base*nslot_add+pos);
400 printf(
"vector_sparse_gpu_ker.insert_b: Error, this function in order to work is supposed to be compiled with nvcc\n");
412 int pos = atomicAdd(&vct_atomic_add,1);
413 vct_add_index.template get<0>(slot_base*nslot_add+pos) =
ele;
414 return vct_add_data.
get(slot_base*nslot_add+pos);
416 printf(
"vector_sparse_gpu_ker.insert_b: Error, this function in order to work is supposed to be compiled with nvcc\n");
430 if (threadIdx.x == 0)
432 int blockId = dim3CoordToInt(blockIdx, gridDim);
433 vct_nadd_index.template get<0>(blockId) = vct_atomic_add;
437 printf(
"vector_sparse_gpu_ker.flush_block_insert: Error, this function in order to work is supposed to be compiled with nvcc\n");
451 if (threadIdx.x == 0)
453 int blockId = dim3CoordToInt(blockIdx, gridDim);
454 vct_nrem_index.template get<0>(blockId) = vct_atomic_rem;
458 printf(
"vector_sparse_gpu_ker.flush_block_remove: Error, this function in order to work is supposed to be compiled with nvcc\n");
462 auto & private_get_vct_nadd_index()
464 return vct_nadd_index;
478 {vct_nadd_index.template get<0>(b) = vct_atomic_add;}
482 printf(
"vector_sparse_gpu_ker.flush_block_insert: Error, this function in order to work is supposed to be compiled with nvcc\n");
486 __device__
auto private_get_data() ->
decltype(vct_add_data.
getBase().
get_data_())
502 {vct_nrem_index.template get<0>(b) = vct_atomic_rem;}
505 printf(
"vector_sparse_gpu_ker.flush_block_remove: Error, this function in order to work is supposed to be compiled with nvcc\n");
533 return vct_add_index;
565 pc = vct_index.check_device_pointer(ptr);
567 if (pc.
match ==
true)
573 pc = vct_data.check_device_pointer(ptr);
575 if (pc.
match ==
true)
581 pc = vct_add_index.check_device_pointer(ptr);
583 if (pc.
match ==
true)
589 pc = vct_rem_index.check_device_pointer(ptr);
591 if (pc.
match ==
true)
597 pc = vct_nadd_index.check_device_pointer(ptr);
599 if (pc.
match ==
true)
601 pc.
match_str = std::string(
"Add index counter vector overflow: ") +
"\n" + pc.
match_str;
605 pc = vct_nrem_index.check_device_pointer(ptr);
607 if (pc.
match ==
true)
609 pc.
match_str = std::string(
"Remove index counter vector overflow: ") +
"\n" + pc.
match_str;
613 pc = vct_add_data.check_device_pointer(ptr);
615 if (pc.
match ==
true)
__device__ __host__ layout & get_data_()
Get the internal data_ structure.
__device__ auto getDataBuffer() const -> const decltype(vct_data)&
Get the data buffer.
__device__ void flush_block_remove(unsigned int b, bool flusher)
It insert an element in the sparse vector.
__device__ void flush_block_remove()
It insert an element in the sparse vector.
__device__ auto get_ele(Ti di) const -> decltype(vct_data.template get< p >(di))
Get an element of the vector.
int yes_has_check_device_pointer
Indicate this structure has a function to check the device pointer.
__device__ void _branchfree_search(Ti x, Ti &id) const
get the element i
__device__ void init()
This function must be called.
__device__ int size()
Get the number of elements.
__device__ openfpm::sparse_index< Ti > get_sparse(Ti id) const
Get the sparse index.
__device__ auto insert_b(Ti ele, Ti slot_base) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
__device__ void flush_block_insert()
It insert an element in the sparse vector.
__device__ Ti get_index(openfpm::sparse_index< Ti > id) const
Get the index associated to the element id.
__device__ auto getAddIndexBuffer() const -> const decltype(vct_add_index)&
Get the indices buffer.
__device__ auto insert_b(Ti ele, Ti slot_base) -> decltype(vct_add_data.get(0))
It insert an element in the sparse vector.
__device__ void remove(Ti ele)
It insert an element in the sparse vector.
__device__ auto get(Ti id, Ti &di) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
__device__ auto get(Ti id) const -> decltype(vct_data.template get< p >(id))
Get an element of the vector.
__device__ auto insert(Ti ele) -> decltype(vct_data.template get< p >(0))
It insert an element in the sparse vector.
__device__ auto insert(Ti ele) -> decltype(vct_add_data.get(0))
It insert an element in the sparse vector.
__device__ auto getIndexBuffer() const -> const decltype(vct_index)&
Get the indices buffer.
__device__ void init_rem_inc()
This function must be called.
__device__ auto get(openfpm::sparse_index< Ti > id) const -> decltype(vct_data.template get< p >(id.id))
Get an element of the vector.
__device__ void remove_b(Ti ele, Ti slot_base)
It insert an element in the sparse vector.
__device__ auto getBackground() const -> decltype(vct_data.template get< p >(0)) &
Get the background value.
__device__ auto getDataBuffer() -> decltype(vct_data)&
Get the data buffer.
__device__ void flush_block_insert(Ti b, bool flusher)
It insert an element in the sparse vector.
__device__ void init_ins_inc()
This function must be called.
__device__ auto get(openfpm::sparse_index< Ti > id) -> decltype(vct_data.template get< p >(id.id))
Get an element of the vector.
__device__ auto getAddDataBuffer() -> decltype(vct_add_data)&
Get the data buffer.
convert a type into constant type
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
grid interface available when on gpu
__device__ __host__ auto get(unsigned int id) const -> decltype(base.template get< p >(grid_key_dx< 1 >(0)))
Get an element of the vector.
__device__ __host__ unsigned int size() const
Return the size of the vector.
__device__ grid_gpu_ker< 1, T_, layout_base, grid_sm< 1, void > > & getBase()
Return the base.
std::string match_str
match string
bool match
Indicate if the pointer match.