1#include <Vector/map_vector.hpp>
5#define NELEMENTS 16777216
13#define IVDEP _Pragma("clang loop vectorize(assume_safety) interleave(assume_safety)")
18#define FLAT_AGGREGATE 0
20template<
typename vector_type,
typename vector_type2>
23 auto i = blockIdx.x * blockDim.x + threadIdx.x;
25 vd_in.template get<0>(i)[0] = i;
26 vd_in.template get<0>(i)[1] = i+100.0;
28 vd_out.template get<0>(i) = i+200.0;
30 vd_out.template get<1>(i)[0] = i;
31 vd_out.template get<1>(i)[1] = i+100.0;
33 vd_out.template get<2>(i)[0][0] = i;
34 vd_out.template get<2>(i)[0][1] = i+100.0;
35 vd_out.template get<2>(i)[1][0] = i+200.0;
36 vd_out.template get<2>(i)[1][1] = i+300.0;
39template<
typename vin_type,
typename vout_type>
40void initialize_buf(vin_type in, vout_type out)
42 auto ite = out.getGPUIterator(256);
43 CUDA_LAUNCH(initialize_buff,ite,out.toKernel(),in.toKernel());
49 using clock = std::chrono::high_resolution_clock;
51 auto elapsedAndReset() ->
double {
52 const auto now = clock::now();
53 const auto seconds = std::chrono::duration<double>{now - last}.count();
59 clock::time_point last = clock::now();
62 constexpr auto PROBLEM_SIZE = 16 * 1024;
63 constexpr auto STEPS = 5;
64 constexpr auto TIMESTEP = 0.0001f;
65 constexpr auto EPS2 = 0.01f;
67 template <
typename Data,
template <
typename>
typename LayoutBase>
73 constexpr auto POS_X = 0;
74 constexpr auto POS_Y = 1;
75 constexpr auto POS_Z = 2;
76 constexpr auto VEL_X = 3;
77 constexpr auto VEL_Y = 4;
78 constexpr auto VEL_Z = 5;
79 constexpr auto MASS = 6;
84 constexpr auto POS = 0;
85 constexpr auto VEL = 1;
86 constexpr auto MASS = 2;
87 constexpr auto TENS = 3;
92 using Vector = OpenFPMVector<Data, memory_traits_inte>;
100 const float distanceSqrX = distanceX * distanceX;
101 const float distanceSqrY = distanceY * distanceY;
102 const float distanceSqrZ = distanceZ * distanceZ;
103 const float distSqr = EPS2 + distanceSqrX + distanceSqrY + distanceSqrZ;
104 const float distSixth = distSqr * distSqr * distSqr;
105 const float invDistCube = 1.0f / std::sqrt(distSixth);
106 const float sts =
particles.get<MASS>(j) * invDistCube * TIMESTEP;
107 particles.get<VEL_X>(i) += distanceSqrX * sts;
108 particles.get<VEL_Y>(i) += distanceSqrY * sts;
109 particles.get<VEL_Z>(i) += distanceSqrZ * sts;
113 const auto& posi = pi.get<POS>();
114 const auto& posj = pj.get<POS>();
115 const float distanceX = posi[0] - posj[0];
116 const float distanceY = posi[1] - posj[1];
117 const float distanceZ = posi[2] - posj[2];
118 const float distanceSqrX = distanceX * distanceX;
119 const float distanceSqrY = distanceY * distanceY;
120 const float distanceSqrZ = distanceZ * distanceZ;
121 const float distSqr = EPS2 + distanceSqrX + distanceSqrY + distanceSqrZ;
122 const float distSixth = distSqr * distSqr * distSqr;
123 const float invDistCube = 1.0f / std::sqrt(distSixth);
124 const float sts = pj.get<2>() * invDistCube * TIMESTEP;
125 auto veli = pi.get<1>();
126 veli[0] += distanceSqrX * sts;
127 veli[1] += distanceSqrY * sts;
128 veli[2] += distanceSqrZ * sts;
138 for (
int i = 0; i < PROBLEM_SIZE; i++)
140 for (
int j = 0; j < PROBLEM_SIZE; j++)
146 for (std::size_t i = 0; i < PROBLEM_SIZE; i++) {
153 const auto& veli = pi.get<1>();
154 auto&& posi = pi.get<0>();
155 posi[0] += veli[0] * TIMESTEP;
156 posi[1] += veli[1] * TIMESTEP;
157 posi[2] += veli[2] * TIMESTEP;
206 out.resize(PROBLEM_SIZE);
207 in.resize(PROBLEM_SIZE);
209 initialize_buf(in,out);
219 std::cout <<
"addresses:\n"
220 << &p0.get<POS_X>() <<
'\n'
221 << &p0.get<POS_Y>() <<
'\n'
222 << &p0.get<POS_Z>() <<
'\n'
223 << &p0.get<VEL_X>() <<
'\n'
224 << &p0.get<VEL_Y>() <<
'\n'
225 << &p0.get<VEL_Z>() <<
'\n'
226 << &p0.get<MASS>() <<
'\n';
228 std::cout <<
"addresses:\n"
229 << &p0.get<POS>()[0] <<
'\n'
230 << &p0.get<POS>()[1] <<
'\n'
231 << &p0.get<POS>()[2] <<
'\n'
232 << &p0.get<VEL>()[0] <<
'\n'
233 << &p0.get<VEL>()[1] <<
'\n'
234 << &p0.get<VEL>()[2] <<
'\n'
235 << &p0.get<MASS>() <<
'\n';
239 std::default_random_engine engine;
240 std::normal_distribution<float> dist(
float(0),
float(1));
241 for (
auto i = 0; i < PROBLEM_SIZE; i++) {
243 particles.get(i).get<POS_X>() = dist(engine);
244 particles.get(i).get<POS_Y>() = dist(engine);
245 particles.get(i).get<POS_Z>() = dist(engine);
246 particles.get(i).get<VEL_X>() = dist(engine) / float(10);
247 particles.get(i).get<VEL_Y>() = dist(engine) / float(10);
248 particles.get(i).get<VEL_Z>() = dist(engine) / float(10);
249 particles.get(i).get<MASS>() = dist(engine) / float(100);
252 pi.get<POS>()[0] = dist(engine);
253 pi.get<POS>()[1] = dist(engine);
254 pi.get<POS>()[2] = dist(engine);
255 pi.get<VEL>()[0] = dist(engine) / float(10);
256 pi.get<VEL>()[1] = dist(engine) / float(10);
257 pi.get<VEL>()[2] = dist(engine) / float(10);
258 pi.get<MASS>() = dist(engine) / float(100);
264 constexpr auto TIMESTEP = 0.0001f;
267 for (
int i = 0; i < PROBLEM_SIZE; i++) {
269 const auto& posi = pi.get<POS>();
271 for (
int j = 0 ; j < PROBLEM_SIZE ; j++) {
273 const auto& posj = pj.get<POS>();
275 const float distanceX = posi[0] - posj[0];
276 const float distanceY = posi[1] - posj[1];
277 const float distanceZ = posi[2] - posj[2];
279 const float distanceSqrX = distanceX * distanceX;
280 const float distanceSqrY = distanceY * distanceY;
281 const float distanceSqrZ = distanceZ * distanceZ;
283 const float distSqr = EPS2 + distanceSqrX + distanceSqrY + distanceSqrZ;
284 const float distSixth = distSqr * distSqr * distSqr;
285 const float invDistCube = 1.0f / sqrtf(distSixth);
286 const float sts = pj.get<2>() * invDistCube * TIMESTEP;
288 particles.get<VEL>(j)[0] += distanceSqrX * invDistCube * sts;
289 particles.get<VEL>(j)[1] += distanceSqrY * invDistCube * sts;
290 particles.get<VEL>(j)[2] += distanceSqrZ * invDistCube * sts;
297 double sumUpdate = 0;
299 for (std::size_t s = 0; s < STEPS; ++s) {
301 sumUpdate += watch.elapsedAndReset();
303 sumMove += watch.elapsedAndReset();
305 std::cout <<
"openfpm\t" << sumUpdate / STEPS <<
'\t' << sumMove / STEPS <<
'\n';
310 std::cout <<
"particle 0 pos: " << p0.get<0>() <<
" " << p0.get<1>() <<
" " << p0.get<2>() <<
'\n';
312 const auto& pos0 =
particles.get<POS>(0);
313 std::cout <<
"particle 0 pos: " << pos0[0] <<
" " << pos0[1] <<
" " << pos0[2] <<
'\n';
Sparse Matrix implementation stub object when OpenFPM is compiled with no linear algebra support.
Implementation of 1-D std::vector like structure.
aggregate of properties, from a list of object if create a struct that follow the OPENFPM native stru...