OpenFPM_pdata  4.1.0
Project that contain the implementation of distributed structures
 
Loading...
Searching...
No Matches
main.cpp
1#include <Vector/map_vector.hpp>
2#include <chrono>
3//#include <memory/HeapMemory.cpp>
4
5#define NELEMENTS 16777216
6
7//#if defined __GNUC__ // Got the idea but it seems incorrect for clang __GNUC__ is defined, in clang it end here
8//# define IVDEP _Pragma("GCC ivdep")
9//#elif defined(_MSC_VER)
10//# define IVDEP __pragma(loop(ivdep))
11//#elif defined __clang__
12//# define IVDEP _Pragma("clang loop vectorize(enable) interleave(enable) distribute(enable)")
13#define IVDEP _Pragma("clang loop vectorize(assume_safety) interleave(assume_safety)")
14//#else
15//# error "Please define IVDEP for your compiler!"
16//#endif
17
18#define FLAT_AGGREGATE 0
19
20template<typename vector_type, typename vector_type2>
21__global__ void initialize_buff(vector_type vd_out, vector_type2 vd_in)
22{
23 auto i = blockIdx.x * blockDim.x + threadIdx.x;
24
25 vd_in.template get<0>(i)[0] = i;
26 vd_in.template get<0>(i)[1] = i+100.0;
27
28 vd_out.template get<0>(i) = i+200.0;
29
30 vd_out.template get<1>(i)[0] = i;
31 vd_out.template get<1>(i)[1] = i+100.0;
32
33 vd_out.template get<2>(i)[0][0] = i;
34 vd_out.template get<2>(i)[0][1] = i+100.0;
35 vd_out.template get<2>(i)[1][0] = i+200.0;
36 vd_out.template get<2>(i)[1][1] = i+300.0;
37}
38
39template<typename vin_type, typename vout_type>
40void initialize_buf(vin_type in, vout_type out)
41{
42 auto ite = out.getGPUIterator(256);
43 CUDA_LAUNCH(initialize_buff,ite,out.toKernel(),in.toKernel());
44}
45
46
47namespace {
48 struct Stopwatch {
49 using clock = std::chrono::high_resolution_clock;
50
51 auto elapsedAndReset() -> double {
52 const auto now = clock::now();
53 const auto seconds = std::chrono::duration<double>{now - last}.count();
54 last = now;
55 return seconds;
56 }
57
58 private:
59 clock::time_point last = clock::now();
60 };
61
62 constexpr auto PROBLEM_SIZE = 16 * 1024;
63 constexpr auto STEPS = 5;
64 constexpr auto TIMESTEP = 0.0001f;
65 constexpr auto EPS2 = 0.01f;
66
67 template <typename Data, template <typename> typename LayoutBase>
69
70#if FLAT_AGGREGATE
72
73 constexpr auto POS_X = 0;
74 constexpr auto POS_Y = 1;
75 constexpr auto POS_Z = 2;
76 constexpr auto VEL_X = 3;
77 constexpr auto VEL_Y = 4;
78 constexpr auto VEL_Z = 5;
79 constexpr auto MASS = 6;
80
81#else
83
84 constexpr auto POS = 0;
85 constexpr auto VEL = 1;
86 constexpr auto MASS = 2;
87 constexpr auto TENS = 3;
88
89#endif
90
91 //using Vector = OpenFPMVector<Data, memory_traits_lin>;
92 using Vector = OpenFPMVector<Data, memory_traits_inte>;
93
94
95 inline void pPInteraction(Vector& particles, int i, int j) {
96#if FLAT_AGGREGATE
97 const float distanceX = particles.get<POS_X>(i) - particles.get<POS_X>(j);
98 const float distanceY = particles.get<POS_Y>(i) - particles.get<POS_Y>(j);
99 const float distanceZ = particles.get<POS_Z>(i) - particles.get<POS_Z>(j);
100 const float distanceSqrX = distanceX * distanceX;
101 const float distanceSqrY = distanceY * distanceY;
102 const float distanceSqrZ = distanceZ * distanceZ;
103 const float distSqr = EPS2 + distanceSqrX + distanceSqrY + distanceSqrZ;
104 const float distSixth = distSqr * distSqr * distSqr;
105 const float invDistCube = 1.0f / std::sqrt(distSixth);
106 const float sts = particles.get<MASS>(j) * invDistCube * TIMESTEP;
107 particles.get<VEL_X>(i) += distanceSqrX * sts;
108 particles.get<VEL_Y>(i) += distanceSqrY * sts;
109 particles.get<VEL_Z>(i) += distanceSqrZ * sts;
110#else
111 const auto& pi = particles.get(i);
112 const auto& pj = particles.get(j);
113 const auto& posi = pi.get<POS>();
114 const auto& posj = pj.get<POS>();
115 const float distanceX = posi[0] - posj[0];
116 const float distanceY = posi[1] - posj[1];
117 const float distanceZ = posi[2] - posj[2];
118 const float distanceSqrX = distanceX * distanceX;
119 const float distanceSqrY = distanceY * distanceY;
120 const float distanceSqrZ = distanceZ * distanceZ;
121 const float distSqr = EPS2 + distanceSqrX + distanceSqrY + distanceSqrZ;
122 const float distSixth = distSqr * distSqr * distSqr;
123 const float invDistCube = 1.0f / std::sqrt(distSixth);
124 const float sts = pj.get<2>() * invDistCube * TIMESTEP;
125 auto veli = pi.get<1>();
126 veli[0] += distanceSqrX * sts;
127 veli[1] += distanceSqrY * sts;
128 veli[2] += distanceSqrZ * sts;
129
131// std::cout << demangle(typeid(decltype(pj)).name()) << std::endl;
132// std::cout << demangle(typeid(decltype(posj)).name()) << std::endl;
133
134#endif
135 }
136
137 void update(Vector& particles) {
138 for (int i = 0; i < PROBLEM_SIZE; i++)
139 IVDEP
140 for (int j = 0; j < PROBLEM_SIZE; j++)
141 pPInteraction(particles, i, j);
142 }
143
144 void move(Vector& particles) {
145 IVDEP
146 for (std::size_t i = 0; i < PROBLEM_SIZE; i++) {
147#if FLAT_AGGREGATE
148 particles.get(i).get<POS_X>() += particles.get(i).get<VEL_X>() * TIMESTEP;
149 particles.get(i).get<POS_Y>() += particles.get(i).get<VEL_Y>() * TIMESTEP;
150 particles.get(i).get<POS_Z>() += particles.get(i).get<VEL_Z>() * TIMESTEP;
151#else
152 auto&& pi = particles.get(i);
153 const auto& veli = pi.get<1>();
154 auto&& posi = pi.get<0>();
155 posi[0] += veli[0] * TIMESTEP;
156 posi[1] += veli[1] * TIMESTEP;
157 posi[2] += veli[2] * TIMESTEP;
158#endif
159 }
160 }
161}
162
163/*template<typename T>
164class type_restrict
165{
166 T ref;
167
168public:
169
170 type_restrict(T & ref)
171 :ref(ref)
172 {}
173
174 type_restrict(type_restrict<T> & ref)
175 :ref(ref.ref)
176 {}
177
178 type_restrict<T> & operator=(type_restrict<T> & ref_)
179 {
180 T * __restrict__ out = &ref;
181 T * __restrict__ in = &ref_.ref;
182
183 *out = *in;
184 return *this;
185 }
186
187 template<typename Te, typename U = typename std::enable_if<std::is_fundamental<Te>::value>::type>
188 type_restrict<T> & operator=(const Te & obj)
189 {
190 ref = obj;
191 return *this;
192 }
193
194};*/
195
196int main() {
197
198 init_wrappers();
199
201
204
205
206 out.resize(PROBLEM_SIZE);
207 in.resize(PROBLEM_SIZE);
208
209 initialize_buf(in,out);
210
211
213
214 Vector particles(PROBLEM_SIZE);
215
216 {
217 const auto& p0 = particles.get(0);
218#if FLAT_AGGREGATE
219 std::cout << "addresses:\n"
220 << &p0.get<POS_X>() << '\n'
221 << &p0.get<POS_Y>() << '\n'
222 << &p0.get<POS_Z>() << '\n'
223 << &p0.get<VEL_X>() << '\n'
224 << &p0.get<VEL_Y>() << '\n'
225 << &p0.get<VEL_Z>() << '\n'
226 << &p0.get<MASS>() << '\n';
227#else
228 std::cout << "addresses:\n"
229 << &p0.get<POS>()[0] << '\n'
230 << &p0.get<POS>()[1] << '\n'
231 << &p0.get<POS>()[2] << '\n'
232 << &p0.get<VEL>()[0] << '\n'
233 << &p0.get<VEL>()[1] << '\n'
234 << &p0.get<VEL>()[2] << '\n'
235 << &p0.get<MASS>() << '\n';
236#endif
237 }
238
239 std::default_random_engine engine;
240 std::normal_distribution<float> dist(float(0), float(1));
241 for (auto i = 0; i < PROBLEM_SIZE; i++) {
242#if FLAT_AGGREGATE
243 particles.get(i).get<POS_X>() = dist(engine);
244 particles.get(i).get<POS_Y>() = dist(engine);
245 particles.get(i).get<POS_Z>() = dist(engine);
246 particles.get(i).get<VEL_X>() = dist(engine) / float(10);
247 particles.get(i).get<VEL_Y>() = dist(engine) / float(10);
248 particles.get(i).get<VEL_Z>() = dist(engine) / float(10);
249 particles.get(i).get<MASS>() = dist(engine) / float(100);
250#else
251 auto&& pi = particles.get(i);
252 pi.get<POS>()[0] = dist(engine);
253 pi.get<POS>()[1] = dist(engine);
254 pi.get<POS>()[2] = dist(engine);
255 pi.get<VEL>()[0] = dist(engine) / float(10);
256 pi.get<VEL>()[1] = dist(engine) / float(10);
257 pi.get<VEL>()[2] = dist(engine) / float(10);
258 pi.get<MASS>() = dist(engine) / float(100);
259#endif
260 }
261
263
264 constexpr auto TIMESTEP = 0.0001f;
265
266 IVDEP
267 for (int i = 0; i < PROBLEM_SIZE; i++) {
268 const auto& pi = particles.get(i);
269 const auto& posi = pi.get<POS>();
270 IVDEP
271 for (int j = 0 ; j < PROBLEM_SIZE ; j++) {
272 const auto& pj = particles.get(j);
273 const auto& posj = pj.get<POS>();
274
275 const float distanceX = posi[0] - posj[0];
276 const float distanceY = posi[1] - posj[1];
277 const float distanceZ = posi[2] - posj[2];
278
279 const float distanceSqrX = distanceX * distanceX;
280 const float distanceSqrY = distanceY * distanceY;
281 const float distanceSqrZ = distanceZ * distanceZ;
282
283 const float distSqr = EPS2 + distanceSqrX + distanceSqrY + distanceSqrZ;
284 const float distSixth = distSqr * distSqr * distSqr;
285 const float invDistCube = 1.0f / sqrtf(distSixth);
286 const float sts = pj.get<2>() * invDistCube * TIMESTEP;
287
288 particles.get<VEL>(j)[0] += distanceSqrX * invDistCube * sts;
289 particles.get<VEL>(j)[1] += distanceSqrY * invDistCube * sts;
290 particles.get<VEL>(j)[2] += distanceSqrZ * invDistCube * sts;
291 }
292 }
293
295
296 Stopwatch watch;
297 double sumUpdate = 0;
298 double sumMove = 0;
299 for (std::size_t s = 0; s < STEPS; ++s) {
300 update(particles);
301 sumUpdate += watch.elapsedAndReset();
302 move(particles);
303 sumMove += watch.elapsedAndReset();
304 }
305 std::cout << "openfpm\t" << sumUpdate / STEPS << '\t' << sumMove / STEPS << '\n';
306
307 {
308#if FLAT_AGGREGATE
309 const auto& p0 = particles.get(0);
310 std::cout << "particle 0 pos: " << p0.get<0>() << " " << p0.get<1>() << " " << p0.get<2>() << '\n';
311#else
312 const auto& pos0 = particles.get<POS>(0);
313 std::cout << "particle 0 pos: " << pos0[0] << " " << pos0[1] << " " << pos0[2] << '\n';
314#endif
315 }
316}
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Definition Vector.hpp:40
Implementation of 1-D std::vector like structure.
Distributed vector.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...