NeuroNetworksBook/Include/algotrading/sum_vect_ocl.cl
super.admin 4a9222852c convert
2025-05-30 16:12:34 +02:00

57 lines
1.8 KiB
Common Lisp

//--- by default some GPU doesn't support doubles
//--- cl_khr_fp64 directive is used to enable work with doubles
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
//+------------------------------------------------------------------+
//| OpenCL kernel for Sum of vectors |
//+------------------------------------------------------------------+
double4 ToVect(__global double *array, int start, int size)
{
double4 result = (double4)(0, 0, 0, 0);
if(start < size)
{
switch(size - start)
{
case 1:
result = (double4)(array[start], 0, 0, 0);
break;
case 2:
result = (double4)(array[start], array[start + 1], 0, 0);
break;
case 3:
result = (double4)(array[start], array[start + 1], array[start + 2], 0);
break;
default:
result = (double4)(array[start], array[start + 1], array[start + 2], array[start + 3]);
break;
}
}
return result;
}
//---
__kernel void SumVectors(__global double *source1,
__global double *source2,
__global double *result,
int size1, int size2)
{
int i = get_global_id(0) * 4;
double4 x = ToVect(source1,i,size1);
double4 y = ToVect(source2,i,size2);
double4 z = x + y;
switch(max(size1, size2) - i)
{
case 3:
result[i + 2] = z.s2;
case 2:
result[i + 1] = z.s1;
case 1:
result[i] = z.s0;
break;
default:
result[i + 3] = z.s3;
result[i + 2] = z.s2;
result[i + 1] = z.s1;
result[i] = z.s0;
break;
}
}
//+------------------------------------------------------------------+