4
\$\begingroup\$

I am faced with a design issue that has been discussed on SO several times, the most similar question being this one. Essentially, I want polymorphism for a CUDA kernel in the form of a "generic" kernel that can apply a "user-supplied" function to every element in an array.

In C++, one could achieve this design by using abstract base classes. Here's a minimal example:

#include<iostream>
#include<vector>
class AbstractFunction{
 public:
 virtual double EvalFunction(double) const = 0;
 virtual std::vector<double> operator()(std::vector<double>) const;
};
std::vector<double> AbstractFunction::operator()(std::vector<double> x) const{
 std::vector<double> y;
 for(auto it=x.begin();it!=x.end();++it) 
 y.push_back(EvalFunction(*it));
 return y;
}
class ConcreteFunction : public AbstractFunction{
 public:
 double EvalFunction(double x) const {return x*x;}
}; 
int main()
{
 ConcreteFunction f;
 std::vector<double> x(2,1);
 x[0] = 1.0; x[1] = 2.0;
 std::vector<double> y = f(x);
 for(auto it=y.begin();it!=y.end();++it)
 std::cout<<*it<<std::endl;
 return 0;
}

As it turns out, this design is nearly impossible to port to CUDA, because CUDA doesn't play nicely with abstract base classes. Fine. After some digging (this question and this question were very useful), I have come up with the following design, which seems to work quite well, but I'd like to hear some critiques:

#include<iostream>
// The Managed class allows us to use CUDA unified memory. 
// See http://devblogs.nvidia.com/parallelforall/unified-memory-in-cuda-6/
class Managed {
public:
 void *operator new(size_t len) {
 void *ptr;
 cudaMallocManaged(&ptr, len);
 cudaDeviceSynchronize();
 return ptr;
 }
 void operator delete(void *ptr) {
 cudaDeviceSynchronize();
 cudaFree(ptr);
 }
};
// Evaluate function on GPU
template<class FunType>
__global__ void EvalKernelGPU(double* x,double* y,int N,FunType* f)
{
 unsigned int tId = blockIdx.x*blockDim.x + threadIdx.x;
 if(tId<N)
 y[tId] = (*f)(x[tId]);
}
//Evaluator class: "policy-based" template design
template<class FunType>
class Evaluator : public FunType
{
 public: 
 Evaluator() {f = new FunType;}
 Evaluator(FunType* fun_in) {f = fun_in;}
 double* EvalFunGPU(double* x,int N){
 double* y;
 cudaMallocManaged(&y,N*sizeof(double));
 dim3 block(512,1);dim3 grid((N+block.x-1)/block.x,1);
 EvalKernelGPU<FunType><<<grid,block>>>(x,y,N,f);
 cudaDeviceSynchronize();
 return y;
 }
 private: 
 FunType* f;
};
//"User-defined" special function, to be evaluated
struct SpecialFun : public Managed
{
 __device__ __host__ double operator()(double x){return x*x;}
};
int main()
{
 Evaluator<SpecialFun> E;
 double* x;
 cudaMallocManaged(&x,2*sizeof(double));
 x[0] = 1.0; x[1] = 2.0;
 double* y = E.EvalFunGPU(x,2);
 std::cout<<"y[0] = "<<y[0]<<"\ny[1] = "<<y[1]<<std::endl;
 return 0;
}

I have some obvious improvements lined up, but I wanted to get input on my basic design before building it out. Thanks for your time!

asked Apr 1, 2016 at 19:37
\$\endgroup\$

1 Answer 1

2
\$\begingroup\$

Is there a good reason against using the thrust library? It seems to come with the nvcc compiler and provides that kind of functionality:

#include <iostream>
#include <thrust/transform.h>
__device__ __managed__ float x[2], y[2];
template <typename T> struct square {
 __host__ __device__ T operator()(const T& x) const {
 return x * x;
 }
};
int main(int argc, char const *argv[]) {
 x[0] = 1; x[1] = 2;
 thrust::transform(x, x + 2, y, square<float>());
 std::cout << x[0] << "**2 = " << y[0] << "\n";
 std::cout << x[1] << "**2 = " << y[1];
 return 0;
}
answered May 6, 2016 at 15:44
\$\endgroup\$
1
  • \$\begingroup\$ I think I tried this a few times and just couldn't get it to work for some reason. I'll take another look though, thanks! \$\endgroup\$ Commented May 6, 2016 at 17:45

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.