OpenFPM  5.2.0
Project that contain the implementation of distributed structures
main.cu
1 #include <Vector/map_vector.hpp>
2 #include <chrono>
3 //#include <memory/HeapMemory.cpp>
4 
5 #define NELEMENTS 16777216
6 
7 //#if defined __GNUC__ // Got the idea but it seems incorrect for clang __GNUC__ is defined, in clang it end here
8 //# define IVDEP _Pragma("GCC ivdep")
9 //#elif defined(_MSC_VER)
10 //# define IVDEP __pragma(loop(ivdep))
11 //#elif defined __clang__
12 //# define IVDEP _Pragma("clang loop vectorize(enable) interleave(enable) distribute(enable)")
13 #define IVDEP _Pragma("clang loop vectorize(assume_safety) interleave(assume_safety)")
14 //#else
15 //# error "Please define IVDEP for your compiler!"
16 //#endif
17 
18 #define FLAT_AGGREGATE 0
19 
20 template<typename vector_type, typename vector_type2>
21 __global__ void initialize_buff(vector_type vd_out, vector_type2 vd_in)
22 {
23  auto i = blockIdx.x * blockDim.x + threadIdx.x;
24 
25  vd_in.template get<0>(i)[0] = i;
26  vd_in.template get<0>(i)[1] = i+100.0;
27 
28  vd_out.template get<0>(i) = i+200.0;
29 
30  vd_out.template get<1>(i)[0] = i;
31  vd_out.template get<1>(i)[1] = i+100.0;
32 
33  vd_out.template get<2>(i)[0][0] = i;
34  vd_out.template get<2>(i)[0][1] = i+100.0;
35  vd_out.template get<2>(i)[1][0] = i+200.0;
36  vd_out.template get<2>(i)[1][1] = i+300.0;
37 }
38 
39 template<typename vin_type, typename vout_type>
40 void initialize_buf(vin_type in, vout_type out)
41 {
42  auto ite = out.getGPUIterator(256);
43  CUDA_LAUNCH(initialize_buff,ite,out.toKernel(),in.toKernel());
44 }
45 
46 
47 namespace {
48  struct Stopwatch {
49  using clock = std::chrono::high_resolution_clock;
50 
51  auto elapsedAndReset() -> double {
52  const auto now = clock::now();
53  const auto seconds = std::chrono::duration<double>{now - last}.count();
54  last = now;
55  return seconds;
56  }
57 
58  private:
59  clock::time_point last = clock::now();
60  };
61 
62  constexpr auto PROBLEM_SIZE = 16 * 1024;
63  constexpr auto STEPS = 5;
64  constexpr auto TIMESTEP = 0.0001f;
65  constexpr auto EPS2 = 0.01f;
66 
67  template <typename Data, template <typename> typename LayoutBase>
69 
70 #if FLAT_AGGREGATE
72 
73  constexpr auto POS_X = 0;
74  constexpr auto POS_Y = 1;
75  constexpr auto POS_Z = 2;
76  constexpr auto VEL_X = 3;
77  constexpr auto VEL_Y = 4;
78  constexpr auto VEL_Z = 5;
79  constexpr auto MASS = 6;
80 
81 #else
83 
84  constexpr auto POS = 0;
85  constexpr auto VEL = 1;
86  constexpr auto MASS = 2;
87  constexpr auto TENS = 3;
88 
89 #endif
90 
91  //using Vector = OpenFPMVector<Data, memory_traits_lin>;
92  using Vector = OpenFPMVector<Data, memory_traits_inte>;
93 
94 
95  inline void pPInteraction(Vector& particles, int i, int j) {
96 #if FLAT_AGGREGATE
97  const float distanceX = particles.get<POS_X>(i) - particles.get<POS_X>(j);
98  const float distanceY = particles.get<POS_Y>(i) - particles.get<POS_Y>(j);
99  const float distanceZ = particles.get<POS_Z>(i) - particles.get<POS_Z>(j);
100  const float distanceSqrX = distanceX * distanceX;
101  const float distanceSqrY = distanceY * distanceY;
102  const float distanceSqrZ = distanceZ * distanceZ;
103  const float distSqr = EPS2 + distanceSqrX + distanceSqrY + distanceSqrZ;
104  const float distSixth = distSqr * distSqr * distSqr;
105  const float invDistCube = 1.0f / std::sqrt(distSixth);
106  const float sts = particles.get<MASS>(j) * invDistCube * TIMESTEP;
107  particles.get<VEL_X>(i) += distanceSqrX * sts;
108  particles.get<VEL_Y>(i) += distanceSqrY * sts;
109  particles.get<VEL_Z>(i) += distanceSqrZ * sts;
110 #else
111  const auto& pi = particles.get(i);
112  const auto& pj = particles.get(j);
113  const auto& posi = pi.get<POS>();
114  const auto& posj = pj.get<POS>();
115  const float distanceX = posi[0] - posj[0];
116  const float distanceY = posi[1] - posj[1];
117  const float distanceZ = posi[2] - posj[2];
118  const float distanceSqrX = distanceX * distanceX;
119  const float distanceSqrY = distanceY * distanceY;
120  const float distanceSqrZ = distanceZ * distanceZ;
121  const float distSqr = EPS2 + distanceSqrX + distanceSqrY + distanceSqrZ;
122  const float distSixth = distSqr * distSqr * distSqr;
123  const float invDistCube = 1.0f / std::sqrt(distSixth);
124  const float sts = pj.get<2>() * invDistCube * TIMESTEP;
125  auto veli = pi.get<1>();
126  veli[0] += distanceSqrX * sts;
127  veli[1] += distanceSqrY * sts;
128  veli[2] += distanceSqrZ * sts;
129 
131 // std::cout << demangle(typeid(decltype(pj)).name()) << std::endl;
132 // std::cout << demangle(typeid(decltype(posj)).name()) << std::endl;
133 
134 #endif
135  }
136 
137  void update(Vector& particles) {
138  for (int i = 0; i < PROBLEM_SIZE; i++)
139  IVDEP
140  for (int j = 0; j < PROBLEM_SIZE; j++)
141  pPInteraction(particles, i, j);
142  }
143 
144  void move(Vector& particles) {
145  IVDEP
146  for (std::size_t i = 0; i < PROBLEM_SIZE; i++) {
147 #if FLAT_AGGREGATE
148  particles.get(i).get<POS_X>() += particles.get(i).get<VEL_X>() * TIMESTEP;
149  particles.get(i).get<POS_Y>() += particles.get(i).get<VEL_Y>() * TIMESTEP;
150  particles.get(i).get<POS_Z>() += particles.get(i).get<VEL_Z>() * TIMESTEP;
151 #else
152  auto&& pi = particles.get(i);
153  const auto& veli = pi.get<1>();
154  auto&& posi = pi.get<0>();
155  posi[0] += veli[0] * TIMESTEP;
156  posi[1] += veli[1] * TIMESTEP;
157  posi[2] += veli[2] * TIMESTEP;
158 #endif
159  }
160  }
161 }
162 
163 /*template<typename T>
164 class type_restrict
165 {
166  T ref;
167 
168 public:
169 
170  type_restrict(T & ref)
171  :ref(ref)
172  {}
173 
174  type_restrict(type_restrict<T> & ref)
175  :ref(ref.ref)
176  {}
177 
178  type_restrict<T> & operator=(type_restrict<T> & ref_)
179  {
180  T * __restrict__ out = &ref;
181  T * __restrict__ in = &ref_.ref;
182 
183  *out = *in;
184  return *this;
185  }
186 
187  template<typename Te, typename U = typename std::enable_if<std::is_fundamental<Te>::value>::type>
188  type_restrict<T> & operator=(const Te & obj)
189  {
190  ref = obj;
191  return *this;
192  }
193 
194 };*/
195 
196 int main() {
197 
198  init_wrappers();
199 
201 
204 
205 
206  out.resize(PROBLEM_SIZE);
207  in.resize(PROBLEM_SIZE);
208 
209  initialize_buf(in,out);
210 
211 
213 
214  Vector particles(PROBLEM_SIZE);
215 
216  {
217  const auto& p0 = particles.get(0);
218 #if FLAT_AGGREGATE
219  std::cout << "addresses:\n"
220  << &p0.get<POS_X>() << '\n'
221  << &p0.get<POS_Y>() << '\n'
222  << &p0.get<POS_Z>() << '\n'
223  << &p0.get<VEL_X>() << '\n'
224  << &p0.get<VEL_Y>() << '\n'
225  << &p0.get<VEL_Z>() << '\n'
226  << &p0.get<MASS>() << '\n';
227 #else
228  std::cout << "addresses:\n"
229  << &p0.get<POS>()[0] << '\n'
230  << &p0.get<POS>()[1] << '\n'
231  << &p0.get<POS>()[2] << '\n'
232  << &p0.get<VEL>()[0] << '\n'
233  << &p0.get<VEL>()[1] << '\n'
234  << &p0.get<VEL>()[2] << '\n'
235  << &p0.get<MASS>() << '\n';
236 #endif
237  }
238 
239  std::default_random_engine engine;
240  std::normal_distribution<float> dist(float(0), float(1));
241  for (auto i = 0; i < PROBLEM_SIZE; i++) {
242 #if FLAT_AGGREGATE
243  particles.get(i).get<POS_X>() = dist(engine);
244  particles.get(i).get<POS_Y>() = dist(engine);
245  particles.get(i).get<POS_Z>() = dist(engine);
246  particles.get(i).get<VEL_X>() = dist(engine) / float(10);
247  particles.get(i).get<VEL_Y>() = dist(engine) / float(10);
248  particles.get(i).get<VEL_Z>() = dist(engine) / float(10);
249  particles.get(i).get<MASS>() = dist(engine) / float(100);
250 #else
251  auto&& pi = particles.get(i);
252  pi.get<POS>()[0] = dist(engine);
253  pi.get<POS>()[1] = dist(engine);
254  pi.get<POS>()[2] = dist(engine);
255  pi.get<VEL>()[0] = dist(engine) / float(10);
256  pi.get<VEL>()[1] = dist(engine) / float(10);
257  pi.get<VEL>()[2] = dist(engine) / float(10);
258  pi.get<MASS>() = dist(engine) / float(100);
259 #endif
260  }
261 
263 
264  constexpr auto TIMESTEP = 0.0001f;
265 
266  IVDEP
267  for (int i = 0; i < PROBLEM_SIZE; i++) {
268  const auto& pi = particles.get(i);
269  const auto& posi = pi.get<POS>();
270  IVDEP
271  for (int j = 0 ; j < PROBLEM_SIZE ; j++) {
272  const auto& pj = particles.get(j);
273  const auto& posj = pj.get<POS>();
274 
275  const float distanceX = posi[0] - posj[0];
276  const float distanceY = posi[1] - posj[1];
277  const float distanceZ = posi[2] - posj[2];
278 
279  const float distanceSqrX = distanceX * distanceX;
280  const float distanceSqrY = distanceY * distanceY;
281  const float distanceSqrZ = distanceZ * distanceZ;
282 
283  const float distSqr = EPS2 + distanceSqrX + distanceSqrY + distanceSqrZ;
284  const float distSixth = distSqr * distSqr * distSqr;
285  const float invDistCube = 1.0f / sqrtf(distSixth);
286  const float sts = pj.get<2>() * invDistCube * TIMESTEP;
287 
288  particles.get<VEL>(j)[0] += distanceSqrX * invDistCube * sts;
289  particles.get<VEL>(j)[1] += distanceSqrY * invDistCube * sts;
290  particles.get<VEL>(j)[2] += distanceSqrZ * invDistCube * sts;
291  }
292  }
293 
295 
296  Stopwatch watch;
297  double sumUpdate = 0;
298  double sumMove = 0;
299  for (std::size_t s = 0; s < STEPS; ++s) {
300  update(particles);
301  sumUpdate += watch.elapsedAndReset();
302  move(particles);
303  sumMove += watch.elapsedAndReset();
304  }
305  std::cout << "openfpm\t" << sumUpdate / STEPS << '\t' << sumMove / STEPS << '\n';
306 
307  {
308 #if FLAT_AGGREGATE
309  const auto& p0 = particles.get(0);
310  std::cout << "particle 0 pos: " << p0.get<0>() << " " << p0.get<1>() << " " << p0.get<2>() << '\n';
311 #else
312  const auto& pos0 = particles.get<POS>(0);
313  std::cout << "particle 0 pos: " << pos0[0] << " " << pos0[1] << " " << pos0[2] << '\n';
314 #endif
315  }
316 }
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition: Vector.hpp:40
Implementation of 1-D std::vector like structure.
Definition: map_vector.hpp:204
Distributed vector.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...
Definition: aggregate.hpp:221