doxygen/openfpm/ofp__context_8hpp_source.html

/*

 * ofp_context.hpp

 *

 *  Created on: Nov 15, 2018

 *      Author: i-bird

 */


#ifndef OFP_CONTEXT_HXX_

#define OFP_CONTEXT_HXX_


#include <iostream>


#ifdef CUDA_ON_CPU


namespace gpu

{

    enum gpu_context_opt

    {

        no_print_props,

        print_props,

        dummy

    };


    struct context_t {};


    class ofp_context_t : public context_t

    {

        protected:


            std::string _props;


            openfpm::vector<aggregate<unsigned char>> tmem;


            template<int no_arg = 0>

            void init(int dev_num, gpu_context_opt opt)

            {}


        public:


            ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = 0, int stream_ = 0)

            {}


            ~ofp_context_t()

            {}


            virtual const std::string& props() const

            {

                return _props;

            }


            virtual int ptx_version() const

            {

                return 0;

            }


            virtual int stream()

            {

                std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;

                return 0;

            }


            // Alloc GPU memory.

            virtual void* alloc(size_t size, int space)

            {

                std::cout << __FILE__ << ":" << __LINE__ << " Not implemented" << std::endl;

                return NULL;

            }


            virtual void free(void* p, int space)

            {

                std::cout << __FILE__ << ":" << __LINE__ << " Not implemented"  << std::endl;

            }


            virtual void synchronize()

            {

                std::cout << __FILE__ << ":" << __LINE__ << " Not implemented"  << std::endl;

            }


            virtual int event()

            {

                std::cout << __FILE__ << ":" << __LINE__ << " Not implemented"  << std::endl;

                return 0;

            }


            virtual void timer_begin()

            {

                std::cout << __FILE__ << ":" << __LINE__ << " Not implemented"  << std::endl;

            }


            virtual double timer_end()

            {

                std::cout << __FILE__ << ":" << __LINE__ << " Not implemented"  << std::endl;

                return 0.0;

            }


            virtual int getDevice()

            {

                std::cout << __FILE__ << ":" << __LINE__ << " Not implemented"  << std::endl;

                return 0;

            }

    };


}


#else

    #ifdef CUDA_GPU


        #include "util/gpu_context.hpp"


        namespace gpu

        {

            enum gpu_context_opt

            {

                no_print_props,

                print_props,

                dummy

            };


            // ofp_context_t is a trivial implementation of context_t. Users can

            // derive this type to provide a custom allocator.


            class ofp_context_t : public context_t

            {

                protected:

                    cudaDeviceProp _props;

                    int _ptx_version;

                    cudaStream_t _stream;


                    cudaEvent_t _timer[2];

                    cudaEvent_t _event;


                    openfpm::vector_gpu<aggregate<unsigned char>> tmem;

                    openfpm::vector_gpu<aggregate<unsigned char>> tmem2;

                    openfpm::vector_gpu<aggregate<unsigned char>> tmem3;


                    // Making this a template argument means we won't generate an instance

                    // of empty_f for each translation unit.

                    template<int no_arg = 0>

                    void init(int dev_num, gpu_context_opt opt)

                    {

                        cudaFuncAttributes attr;

                        #ifdef __NVCC__

                        cudaError_t result = cudaFuncGetAttributes(&attr, (void *)empty_f<0>);

                        if(cudaSuccess != result) throw cuda_exception_t(result);

                        _ptx_version = attr.ptxVersion;

                        #else

                        _ptx_version = 60;

                        //std::cout << __FILE__ << ":" << __LINE__ << " Warning initialization of GPU context has been done from a standard Cpp file, rather than a CUDA or HIP file" << std::endl;

                        #endif


                        int num_dev;

                        cudaGetDeviceCount(&num_dev);


                        if (num_dev == 0) {return;}


                        if (opt != gpu_context_opt::dummy)

                        {

                            cudaSetDevice(dev_num % num_dev);

                        }


                        int ord;

                        cudaGetDevice(&ord);

                        cudaGetDeviceProperties(&_props, ord);


                        cudaEventCreate(&_timer[0]);

                        cudaEventCreate(&_timer[1]);

                        cudaEventCreate(&_event);

                    }


                public:


                    ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = 0, cudaStream_t stream_ = 0)

                    :context_t(), _stream(stream_)

                    {

                        init(dev_num,opt);

                        if(opt == gpu_context_opt::print_props)

                        {

                            printf("%s\n", device_prop_string(_props).c_str());

                        }

                    }


                    ~ofp_context_t()

                    {

                        cudaEventDestroy(_timer[0]);

                        cudaEventDestroy(_timer[1]);

                        cudaEventDestroy(_event);

                    }


                    virtual const cudaDeviceProp& props() const { return _props; }

                    virtual int ptx_version() const { return _ptx_version; }

                    virtual cudaStream_t stream() { return _stream; }


                    // Alloc GPU memory.

                    virtual void* alloc(size_t size, memory_space_t space)

                    {

                        void* p = nullptr;

                        if(size)

                        {

                            cudaError_t result = (memory_space_device == space) ?cudaMalloc(&p, size) : cudaMallocHost(&p, size);

                            if(cudaSuccess != result) throw cuda_exception_t(result);

                        }

                        return p;

                    }


                    virtual void free(void* p, memory_space_t space)

                    {

                        if(p)

                        {

                            cudaError_t result = (memory_space_device == space) ? cudaFree(p) : cudaFreeHost(p);

                            if(cudaSuccess != result) throw cuda_exception_t(result);

                        }

                    }


                    virtual void synchronize()

                    {

                        cudaError_t result = _stream ?

                        cudaStreamSynchronize(_stream) :

                        cudaDeviceSynchronize();

                        if(cudaSuccess != result) throw cuda_exception_t(result);

                    }


                    virtual cudaEvent_t event()

                    {

                        return _event;

                    }


                    virtual void timer_begin()

                    {

                        cudaEventRecord(_timer[0], _stream);

                    }


                    virtual double timer_end()

                    {

                        cudaEventRecord(_timer[1], _stream);

                        cudaEventSynchronize(_timer[1]);

                        float ms;

                        cudaEventElapsedTime(&ms, _timer[0], _timer[1]);

                        return ms / 1.0e3;

                    }


                    virtual int getDevice()

                    {

                        int dev = 0;


                        cudaGetDevice(&dev);


                        return dev;

                    }


                    virtual int getNDevice()

                    {

                        int num_dev;

                        cudaGetDeviceCount(&num_dev);


                        return num_dev;

                    }


                    openfpm::vector_gpu<aggregate<unsigned char>> & getTemporalCUB()

                    {

                        return tmem;

                    }


                    openfpm::vector_gpu<aggregate<unsigned char>> & getTemporalCUB2()

                    {

                        return tmem2;

                    }


                    openfpm::vector_gpu<aggregate<unsigned char>> & getTemporalCUB3()

                    {

                        return tmem3;

                    }

            };


        }


    #else


        namespace gpu

        {


            enum gpu_context_opt

            {

                no_print_props,

                print_props,

                dummy

            };


            // Stub class for modern gpu


            struct ofp_context_t

            {

                ofp_context_t(gpu_context_opt opt = gpu_context_opt::no_print_props , int dev_num = 0)

                {}

            };

        }


    #endif


#endif


#endif /* OFP_CONTEXT_HXX_ */

openfpm::vector
Implementation of 1-D std::vector like structure.
Definition map_vector.hpp:203

cub::init
OutputIteratorT OffsetT ReductionOpT OuputT init
< [in] The initial value of the reduction
Definition dispatch_reduce.cuh:119

gpu::ofp_context_t
Definition ofp_context.hpp:303