I use this for all my CUDA - Stolen from TF 1DKern.cu

by **hbyte** » Sat Feb 03, 2024 4:37 pm

Its a basic iterator that just takes the hassle out of CUDA.( Its in the TensorFlow source code. )

Code: Select all: #include <algorithm> #include <complex> #include <iostream> #include <math.h> #include <vector> /*Begin 1DKern definition */ /*This is a direct copy of Tensorflows 1DKern code*/ namespace detail { template <typename T> class GpuGridRange { struct Iterator { __device__ Iterator(T index, T delta) : index_(index), delta_(delta) {} __device__ T operator*() const { return index_;} __device__ Iterator& operator++() { index_ += delta_; return *this; } __device__ bool operator!=(const Iterator& other) const { bool greater = index_ > other.index_; bool less = index_ < other.index_; if(!other.delta_){ return less; } if(!delta_){ return greater; } return less || greater; } private: T index_; const T delta_; }; //end Iterator struct public: __device__ GpuGridRange(T begin,T delta,T end) : begin_(begin),delta_(delta),end_(end) {} __device__ Iterator begin() const {return Iterator(begin_,delta_); } __device__ Iterator end() const {return Iterator(end_,0);} private: T begin_; T delta_; T end_; }; //end GPU class class }; //end namespace detail template <typename T> //Allows you to use GPU iterator with all data types __device__ detail::GpuGridRange<T> GpuGridRangeX(T count) { return detail::GpuGridRange<T>( /*begin*/blockIdx.x * blockDim.x + threadIdx.x, /*delta*/gridDim.x * blockDim.x, /*end*/count ); } template <typename T> //Allows you to use GPU iterator with all data types __device__ detail::GpuGridRange<T> GpuGridRangeY(T count) { return detail::GpuGridRange<T>( /*begin*/blockIdx.y * blockDim.y + threadIdx.y, /*delta*/gridDim.y * blockDim.y, /*end*/count ); } template <typename T> //Allows you to use GPU iterator with all data types __device__ detail::GpuGridRange<T> GpuGridRangeZ(T count) { return detail::GpuGridRange<T>( /*begin*/blockIdx.z * blockDim.z + threadIdx.z, /*delta*/gridDim.z * blockDim.z, /*end*/count ); } #define GPU_1D_KERN_LOOP(i, n) \ for (int i : ::GpuGridRangeX<int>(n)) #define GPU_AXIS_KERNEL_LOOP(i, n, axis) \ for (int i : ::GpuGridRange##axis<int>(n)) /*End 1DKern definition*/

I use this for all my CUDA - Stolen from TF 1DKern.cu

I use this for all my CUDA - Stolen from TF 1DKern.cu

Who is online