4#define PRINT_STACKTRACE
11#include "Vector/vector_dist.hpp"
13#include "Draw/DrawParticles.hpp"
14#include "util/stat/common_statistics.hpp"
17__global__
void test1_syncthreads()
56 return wthr.x * wthr.y * wthr.z;
61 return thr.x * thr.y * thr.z;
65int main(
int argc,
char* argv[])
69 openfpm_init(&argc,&argv);
75 g.wthr = dim3(512*512,1,1);
78 for (
int i = 0; i < 10; i++)
83 CUDA_LAUNCH(test1_syncthreads,g);
87 std::cout <<
"TKERNEL: " << t_ker.
getwct() << std::endl;
94 tele_ker.add(t_ker.
getwct());
101 double tele_ker_mean;
103 standard_deviation(tele_ker,tele_ker_mean,tele_ker_dev);
105 std::cout << g.wthr.x*g.wthr.y*g.wthr.z <<
" " << g.thr.x << std::endl;
106 std::cout <<
"SYNCTHREAD LATENCY: " << tele_ker_mean / (g.wthr.x*g.wthr.y*g.wthr.z*24*g.thr.x) * 1e9 <<
" ns " <<
" error: " << tele_ker_dev << std::endl;
113int main(
int argc,
char* argv[])
Implementation of 1-D std::vector like structure.
Class for cpu time benchmarking.
void stop()
Stop the timer.
void start()
Start the timer.
double getwct()
Return the elapsed real time.