57 lines
1.8 KiB
Common Lisp
57 lines
1.8 KiB
Common Lisp
//--- by default some GPU doesn't support doubles
|
|
//--- cl_khr_fp64 directive is used to enable work with doubles
|
|
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
|
//+------------------------------------------------------------------+
|
|
//| OpenCL kernel for Sum of vectors |
|
|
//+------------------------------------------------------------------+
|
|
double4 ToVect(__global double *array, int start, int size)
|
|
{
|
|
double4 result = (double4)(0, 0, 0, 0);
|
|
if(start < size)
|
|
{
|
|
switch(size - start)
|
|
{
|
|
case 1:
|
|
result = (double4)(array[start], 0, 0, 0);
|
|
break;
|
|
case 2:
|
|
result = (double4)(array[start], array[start + 1], 0, 0);
|
|
break;
|
|
case 3:
|
|
result = (double4)(array[start], array[start + 1], array[start + 2], 0);
|
|
break;
|
|
default:
|
|
result = (double4)(array[start], array[start + 1], array[start + 2], array[start + 3]);
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
//---
|
|
__kernel void SumVectors(__global double *source1,
|
|
__global double *source2,
|
|
__global double *result,
|
|
int size1, int size2)
|
|
{
|
|
int i = get_global_id(0) * 4;
|
|
double4 x = ToVect(source1,i,size1);
|
|
double4 y = ToVect(source2,i,size2);
|
|
double4 z = x + y;
|
|
switch(max(size1, size2) - i)
|
|
{
|
|
case 3:
|
|
result[i + 2] = z.s2;
|
|
case 2:
|
|
result[i + 1] = z.s1;
|
|
case 1:
|
|
result[i] = z.s0;
|
|
break;
|
|
default:
|
|
result[i + 3] = z.s3;
|
|
result[i + 2] = z.s2;
|
|
result[i + 1] = z.s1;
|
|
result[i] = z.s0;
|
|
break;
|
|
}
|
|
}
|
|
//+------------------------------------------------------------------+
|