OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
merge_ofp.cuh
1 /*
2  * segreduce_ofp.hpp
3  *
4  * Created on: May 15, 2019
5  * Author: i-bird
6  */
7 
8  #ifndef MERGE_OFP_HPP_
9  #define MERGE_OFP_HPP_
10 
11  #ifdef __NVCC__
12 
13  #include "Vector/map_vector.hpp"
14  #include "util/cuda_launch.hpp"
15 
16  #if CUDART_VERSION >= 11000
17  #ifndef CUDA_ON_CPU
18  // Here we have for sure CUDA >= 11
19  #ifdef __HIP__
20  #undef __CUDACC__
21  #undef __CUDA__
22  #include <thrust/merge.h>
23  #include <thrust/execution_policy.h>
24  #define __CUDACC__
25  #define __CUDA__
26  #else
27  #include <thrust/merge.h>
28  #include <thrust/execution_policy.h>
29  #endif
30  #endif
31  #else
32  #include <thrust/merge.h>
33  #include <thrust/execution_policy.h>
34 // #include "util/cuda/moderngpu/kernel_merge.hxx"
35  #endif
36  #include "util/cuda/ofp_context.hxx"
37 
38 
39  namespace openfpm
40  {
41  template<typename a_keys_it, typename a_vals_it,
42  typename b_keys_it, typename b_vals_it,
43  typename c_keys_it, typename c_vals_it,
44  typename comp_t, typename context_t>
45  void merge(a_keys_it a_keys, a_vals_it a_vals, int a_count,
46  b_keys_it b_keys, b_vals_it b_vals, int b_count,
47  c_keys_it c_keys, c_vals_it c_vals, comp_t comp, context_t& context)
48  {
49  #ifdef CUDA_ON_CPU
50 
51  int a_it = 0;
52  int b_it = 0;
53  int c_it = 0;
54 
55  while (a_it < a_count || b_it < b_count)
56  {
57  if (a_it < a_count)
58  {
59  if (b_it < b_count)
60  {
61  if (comp(b_keys[b_it],a_keys[a_it]))
62  {
63  c_keys[c_it] = b_keys[b_it];
64  c_vals[c_it] = b_vals[b_it];
65  c_it++;
66  b_it++;
67  }
68  else
69  {
70  c_keys[c_it] = a_keys[a_it];
71  c_vals[c_it] = a_vals[a_it];
72  c_it++;
73  a_it++;
74  }
75  }
76  else
77  {
78  c_keys[c_it] = a_keys[a_it];
79  c_vals[c_it] = a_vals[a_it];
80  c_it++;
81  a_it++;
82  }
83  }
84  else
85  {
86  c_keys[c_it] = b_keys[b_it];
87  c_vals[c_it] = b_vals[b_it];
88  c_it++;
89  b_it++;
90  }
91  }
92 
93  #else
94 
95  #ifdef __HIP__
96 
97  thrust::merge_by_key(thrust::device, a_keys,a_keys + a_count,
98  b_keys,b_keys + b_count,
99  a_vals,b_vals,
100  c_keys,c_vals,comp);
101 
102  #else
103 
104 // It seems broken on some CUDA on some hardware. Anyway is not anymore supported
105 // on some hardware ... we move to thrust
106 // mgpu::merge(a_keys,a_vals,a_count,b_keys,b_vals,b_count,c_keys,c_vals,comp,context);
107 
108  thrust::merge_by_key(thrust::device, a_keys,a_keys + a_count,
109  b_keys,b_keys + b_count,
110  a_vals,b_vals,
111  c_keys,c_vals,comp);
112 
113  #endif
114 
115  #endif
116  }
117  }
118 
119  #endif /* __NVCC__ */
120 
121  #endif /* SCAN_OFP_HPP_ */
122 
convert a type into constant type
Definition: aggregate.hpp:292