//--- by default some GPU doesn't support doubles //--- cl_khr_fp64 directive is used to enable work with doubles #pragma OPENCL EXTENSION cl_khr_fp64 : enable //+------------------------------------------------------------------+ //| OpenCL kernel for Sum of vectors | //+------------------------------------------------------------------+ double4 ToVect(__global double *array, int start, int size) { double4 result = (double4)(0, 0, 0, 0); if(start < size) { switch(size - start) { case 1: result = (double4)(array[start], 0, 0, 0); break; case 2: result = (double4)(array[start], array[start + 1], 0, 0); break; case 3: result = (double4)(array[start], array[start + 1], array[start + 2], 0); break; default: result = (double4)(array[start], array[start + 1], array[start + 2], array[start + 3]); break; } } return result; } //--- __kernel void SumVectors(__global double *source1, __global double *source2, __global double *result, int size1, int size2) { int i = get_global_id(0) * 4; double4 x = ToVect(source1,i,size1); double4 y = ToVect(source2,i,size2); double4 z = x + y; switch(max(size1, size2) - i) { case 3: result[i + 2] = z.s2; case 2: result[i + 1] = z.s1; case 1: result[i] = z.s0; break; default: result[i + 3] = z.s3; result[i + 2] = z.s2; result[i + 1] = z.s1; result[i] = z.s0; break; } } //+------------------------------------------------------------------+