Warrior_EA/AI/NetworkCL.cl
super.admin 0a527b0cf9 convert
2025-05-30 16:35:54 +02:00

573 lines
No EOL
22 KiB
Common Lisp

//--- cl_khr_fp64 directive is used to enable work with doubles
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void FeedForward(__global double *matrix_w,
__global double *matrix_i,
__global double *matrix_o,
int inputs, int activation)
{
int i=get_global_id(0);
double sum=0.0;
double4 inp, weight;
int shift=(inputs+1)*i;
for(int k=0; k<=inputs; k=k+4)
{
switch(inputs-k)
{
case 0:
inp=(double4)(1,0,0,0);
weight=(double4)(matrix_w[shift+k],0,0,0);
break;
case 1:
inp=(double4)(matrix_i[k],1,0,0);
weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],0,0);
break;
case 2:
inp=(double4)(matrix_i[k],matrix_i[k+1],1,0);
weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],matrix_w[shift+k+2],0);
break;
case 3:
inp=(double4)(matrix_i[k],matrix_i[k+1],matrix_i[k+2],1);
weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],matrix_w[shift+k+2],matrix_w[shift+k+3]);
break;
default:
inp=(double4)(matrix_i[k],matrix_i[k+1],matrix_i[k+2],matrix_i[k+3]);
weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],matrix_w[shift+k+2],matrix_w[shift+k+3]);
break;
}
sum+=dot(inp,weight);
}
switch(activation)
{
case 0:
sum=tanh(sum);
break;
case 1:
sum=1/(1+exp(-clamp(sum,-50.0,50.0)));
break;
case 2:
if(sum<0)
sum*=0.01;
break;
default:
break;
}
matrix_o[i]=sum;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void CalcOutputGradient(__global double *matrix_t,
__global double *matrix_o,
__global double *matrix_ig,
int activation)
{
int i=get_global_id(0);
double temp=0;
double out=matrix_o[i];
switch(activation)
{
case 0:
temp=clamp(matrix_t[i],-1.0,1.0)-out;
temp=temp*(1-pow(out==1 || out==-1 ? 0.99999999 : out,2));
break;
case 1:
temp=clamp(matrix_t[i],0.0,1.0)-out;
temp=temp*(out==0 || out==1 ? 0.00000001 : (out*(1-out)));
break;
case 2:
temp=(matrix_t[i]-out)*(out>=0 ? 1.0 : 0.01);
break;
default:
temp=(matrix_t[i]-out);
break;
}
matrix_ig[i]=temp;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void CalcHiddenGradient(__global double *matrix_w,
__global double *matrix_g,
__global double *matrix_o,
__global double *matrix_ig,
int outputs, int activation)
{
int i=get_global_id(0);
int inputs=get_global_size(0);
double sum=0;
double out=matrix_o[i];
double4 grad, weight;
for(int k=0;k<outputs;k+=4)
{
switch(outputs-k)
{
case 1:
grad=(double4)(matrix_g[k],0,0,0);
weight=(double4)(matrix_w[k*inputs+i],0,0,0);
break;
case 2:
grad=(double4)(matrix_g[k],matrix_g[k+1],0,0);
weight=(double4)(matrix_w[k*inputs+i],matrix_w[k*inputs+i+1],0,0);
break;
case 3:
grad=(double4)(matrix_g[k],matrix_g[k+1],matrix_g[k+2],0);
weight=(double4)(matrix_w[k*inputs+i],matrix_w[k*inputs+i+1],matrix_w[k*inputs+i+2],0);
break;
default:
grad=(double4)(matrix_g[k],matrix_g[k+1],matrix_g[k+2],matrix_g[k+3]);
weight=(double4)(matrix_w[k*inputs+i],matrix_w[k*inputs+i+1],matrix_w[k*inputs+i+2],matrix_w[k*inputs+i+3]);
break;
}
sum+=dot(grad,weight);
}
switch(activation)
{
case 0:
sum=clamp(sum+out,-1.0,1.0)-out;
sum=sum*(1-pow(out==1 || out==-1 ? 0.99999999 : out,2));
break;
case 1:
sum=clamp(sum+out,0.0,1.0)-out;
sum=sum*(out==0 || out==1 ? 0.00000001 : (out*(1-out)));
break;
case 2:
if(out<0)
sum*=0.01;
break;
default:
break;
}
matrix_ig[i]=sum;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void UpdateWeightsMomentum(__global double *matrix_w,
__global double *matrix_g,
__global double *matrix_i,
__global double *matrix_dw,
int inputs, double learning_rates, double momentum)
{
int i=get_global_id(0);
int j=get_global_id(1);
int wi=i*(inputs+1)+j;
double delta=learning_rates*matrix_g[i]*(j<inputs ? matrix_i[j] : 1) + momentum*matrix_dw[wi];
matrix_dw[wi]=delta;
matrix_w[wi]+=delta;
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void UpdateWeightsAdam(__global double *matrix_w,
__global const double *matrix_g,
__global const double *matrix_i,
__global double *matrix_m,
__global double *matrix_v,
const int inputs, const double l, const double b1, const double b2)
{
const int i=get_global_id(0);
const int j=get_global_id(1);
const int wi=i*(inputs+1)+j*4;
double4 m, v, weight, inp;
switch(inputs+1-j*4)
{
case 0:
inp=(double4)(1,0,0,0);
weight=(double4)(matrix_w[wi],0,0,0);
m=(double4)(matrix_m[wi],0,0,0);
v=(double4)(matrix_v[wi],0,0,0);
break;
case 1:
inp=(double4)(matrix_i[j],1,0,0);
weight=(double4)(matrix_w[wi],matrix_w[wi+1],0,0);
m=(double4)(matrix_m[wi],matrix_m[wi+1],0,0);
v=(double4)(matrix_v[wi],matrix_v[wi+1],0,0);
break;
case 2:
inp=(double4)(matrix_i[j],matrix_i[j+1],1,0);
weight=(double4)(matrix_w[wi],matrix_w[wi+1],matrix_w[wi+2],0);
m=(double4)(matrix_m[wi],matrix_m[wi+1],matrix_m[wi+2],0);
v=(double4)(matrix_v[wi],matrix_v[wi+1],matrix_v[wi+2],0);
break;
case 3:
inp=(double4)(matrix_i[j],matrix_i[j+1],matrix_i[j+2],1);
weight=(double4)(matrix_w[wi],matrix_w[wi+1],matrix_w[wi+2],matrix_w[wi+3]);
m=(double4)(matrix_m[wi],matrix_m[wi+1],matrix_m[wi+2],matrix_m[wi+3]);
v=(double4)(matrix_v[wi],matrix_v[wi+1],matrix_v[wi+2],matrix_v[wi+3]);
break;
default:
inp=(double4)(matrix_i[j],matrix_i[j+1],matrix_i[j+2],matrix_i[j+3]);
weight=(double4)(matrix_w[wi],matrix_w[wi+1],matrix_w[wi+2],matrix_w[wi+3]);
m=(double4)(matrix_m[wi],matrix_m[wi+1],matrix_m[wi+2],matrix_m[wi+3]);
v=(double4)(matrix_v[wi],matrix_v[wi+1],matrix_v[wi+2],matrix_v[wi+3]);
break;
}
double4 g=(double4)(matrix_g[i])*inp;
double4 mt=b1*m+(1-b1)*g;
double4 vt=b2*v+(1-b2)*pow(g,2);
double4 delta=l*mt/(vt>0 ? sqrt(vt) : l*10);
switch(inputs+1-j*4)
{
case 2:
matrix_w[wi+2]+=delta.s2;
matrix_m[wi+2]=mt.s2;
matrix_v[wi+2]=vt.s2;
case 1:
matrix_w[wi+1]+=delta.s1;
matrix_m[wi+1]=mt.s1;
matrix_v[wi+1]=vt.s1;
case 0:
matrix_w[wi]+=delta.s0;
matrix_m[wi]=mt.s0;
matrix_v[wi]=vt.s0;
break;
default:
matrix_w[wi]+=delta.s0;
matrix_m[wi]=mt.s0;
matrix_v[wi]=vt.s0;
matrix_w[wi+1]+=delta.s1;
matrix_m[wi+1]=mt.s1;
matrix_v[wi+1]=vt.s1;
matrix_w[wi+2]+=delta.s2;
matrix_m[wi+2]=mt.s2;
matrix_v[wi+2]=vt.s2;
matrix_w[wi+3]+=delta.s3;
matrix_m[wi+3]=mt.s3;
matrix_v[wi+3]=vt.s3;
break;
}
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void FeedForwardProof(__global double *matrix_i,
__global double *matrix_o,
int inputs, int window, int step)
{
int i=get_global_id(0);
int pos=i*step;
double result=matrix_o[pos];
for(int k=1; k<window; k=k+1)
{
int shift=k+pos;
if(shift>=inputs)
break;
result=max(result,matrix_o[shift]);
}
matrix_o[i]=result;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void CalcInputGradientProof(__global double *matrix_i,
__global double *matrix_g,
__global double *matrix_o,
__global double *matrix_ig,
int outputs, int window, int step)
{
int i=get_global_id(0);
double prev_gradient=0;
double value=matrix_i[i];
int start=i-window+step;
start=(start-start%step)/step;
int stop=(i-i%step)/step+1;
for(int out=max(0,start); out<min(outputs,stop); out++)
{
if(value==matrix_o[out])
prev_gradient+=matrix_g[out];
}
matrix_ig[i]=prev_gradient;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void FeedForwardConv(__global double *matrix_w,
__global double *matrix_i,
__global double *matrix_o,
int inputs, int step,
int window_in, int window_out,
uint activation)
{
int i=get_global_id(0);
int w_in=window_in;
int w_out=window_out;
double sum=0.0;
double4 inp, weight;
int shift_out=w_out*i;
int shift_in=step*i;
for(int out=0;out<w_out;out++)
{
int shift=(w_in+1)*out;
int stop=(w_in<=(inputs-shift_in) ? w_in : (inputs-shift_in));
for(int k=0; k<=stop; k=k+4)
{
switch(stop-k)
{
case 0:
inp=(double4)(1,0,0,0);
weight=(double4)(matrix_w[shift+k],0,0,0);
break;
case 1:
inp=(double4)(matrix_i[shift_in+k],1,0,0);
weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],0,0);
break;
case 2:
inp=(double4)(matrix_i[shift_in+k],matrix_i[shift_in+k+1],1,0);
weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],matrix_w[shift+k+2],0);
break;
case 3:
inp=(double4)(matrix_i[shift_in+k],matrix_i[shift_in+k+1],matrix_i[shift_in+k+2],1);
weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],matrix_w[shift+k+2],matrix_w[shift+k+3]);
break;
default:
inp=(double4)(matrix_i[shift_in+k],matrix_i[shift_in+k+1],matrix_i[shift_in+k+2],matrix_i[shift_in+k+3]);
weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],matrix_w[shift+k+2],matrix_w[shift+k+3]);
break;
}
sum+=dot(inp,weight);
}
switch(activation)
{
case 0:
sum=tanh(sum);
break;
case 1:
sum=1/(1+exp(-clamp(sum,-50.0,50.0)));
break;
case 2:
if(sum<0)
sum*=0.01;
break;
default:
break;
}
matrix_o[out+shift_out]=sum;
}
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void CalcHiddenGradientConv(__global double *matrix_w,
__global double *matrix_g,
__global double *matrix_o,
__global double *matrix_ig,
int outputs, int step,
int window_in, int window_out,
uint activation)
{
int i=get_global_id(0);
int inputs=get_global_size(0);
double sum=0;
double out=matrix_o[i];
int start=i-window_in+step;
start=(start-start%step)/step;
int stop=(i-i%step)/step+1;
if(stop>(outputs/window_out))
stop=outputs;
for(int h=0;h<window_out;h+=4)
{
for(int k=start;k<stop;k++)
{
int shift_w=(stop-k-1)*step+i%step+h*(window_in+1);
sum+=matrix_g[k*window_out+h]*matrix_w[shift_w];
}
}
switch(activation)
{
case 0:
sum=clamp(sum+out,-1.0,1.0)-out;
sum=sum*(1-pow(out==1 || out==-1 ? 0.99999999 : out,2));
break;
case 1:
sum=clamp(sum+out,0.0,1.0)-out;
sum=sum*(out==0 || out==1 ? 0.00000001 : (out*(1-out)));
break;
case 2:
if(out<0)
sum*=0.01;
break;
default:
break;
}
matrix_ig[i]=sum;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void UpdateWeightsConvMomentum(__global double *matrix_w,
__global double *matrix_g,
__global double *matrix_i,
__global double *matrix_dw,
int inputs, double learning_rates, double momentum,
int window_in, int window_out, int step)
{
const int i=get_global_id(0);
const int shift=i%(window_in+1);
const int shift_out=(i-shift)/(window_in+1);
int total=(inputs-window_in)%step;
total=(inputs-window_in-total)/step+(total>0 ? 1 : 0);
double grad=0;
for(int t=0;t<total;t++)
{
if(shift!=window_in && (shift+t*window_in)>=inputs)
break;
grad+=matrix_g[t*window_out+shift_out]*(shift==window_in ? 1 : matrix_i[shift+t*step]);
}
double delta=learning_rates*grad + momentum*matrix_dw[i];
matrix_dw[i]=delta;
matrix_w[i]+=delta;
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void UpdateWeightsConvAdam(__global double *matrix_w,
__global const double *matrix_g,
__global const double *matrix_i,
__global double *matrix_m,
__global double *matrix_v,
const int inputs, const double l, const double b1, const double b2,
int window_in, int window_out, int step)
{
const int i=get_global_id(0);
const int shift=i%(window_in+1);
const int shift_out=(i-shift)/(window_in+1);
int total=(inputs-(window_in-step))%step;
total=(inputs-(window_in-step)-total)/step+(total>0 ? 1 : 0);
double grad=0;
for(int t=0;t<total;t++)
{
if(shift!=window_in && (shift+t*window_in)>=inputs)
break;
grad+=matrix_g[t*window_out+shift_out]*(shift==window_in ? 1 : matrix_i[shift+t*step]);
}
double mt=b1*matrix_m[i]+(1-b1)*grad;
double vt=b2*matrix_v[i]+(1-b2)*pow(grad,2);
double delta=l*mt/(vt>0 ? sqrt(vt) : l*10);
matrix_w[i]+=delta;
matrix_m[i]=mt;
matrix_v[i]=vt;
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void AttentionScore(__global double *querys,
__global double *keys,
__global double *score,
int dimension)
{
int q=get_global_id(0);
int shift_q=q*dimension;
int units=get_global_size(0);
int shift_s=q*units;
double koef=sqrt((double)(units*dimension));
if(koef<1)
koef=1;
double sum=0;
for(int k=0;k<units;k++)
{
double result=0;
int shift_k=k*dimension;
for(int i=0;i<dimension;i++)
result+=(querys[shift_q+i]*keys[shift_k+i]);
result=exp(result/koef);
score[shift_s+k]=result;
sum+=result;
}
for(int k=0;k<units;k++)
score[shift_s+k]/=sum;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void AttentionOut(__global double *scores,
__global double *values,
__global double *inputs,
__global double *out)
{
int units=get_global_size(0);
int u=get_global_id(0);
int d=get_global_id(1);
int dimension=get_global_size(1);
int shift=u*dimension+d;
double result=0;
for(int i=0;i<units;i++)
result+=scores[u*units+i]*values[i*dimension+d];
out[shift]=result+inputs[shift];
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void SumMatrix(__global double *matrix1,
__global double *matrix2,
__global double *matrix_out,
int dimension)
{
const int i=get_global_id(0)*dimension;
for(int k=0;k<dimension;k++)
matrix_out[i+k]=matrix1[i+k]+matrix2[i+k];
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void AttentionIsideGradients(__global double *querys,__global double *querys_g,
__global double *keys,__global double *keys_g,
__global double *values,__global double *values_g,
__global double *scores,
__global double *gradient)
{
int u=get_global_id(0);
int d=get_global_id(1);
int units=get_global_size(0);
int dimension=get_global_size(1);
double koef=sqrt((double)(units));
if(koef<1)
koef=1;
double vg=0;
double qg=0;
double kg=0;
for(int iu=0;iu<units;iu++)
{
double g=gradient[iu*dimension+d]/2;
double sc=scores[iu*units+u];
vg+=sc*g;
//---
double sqg=0;
double skg=0;
for(int id=0;id<dimension;id++)
{
sqg+=values[iu*dimension+id]*gradient[u*dimension+id]/2;
skg+=values[u*dimension+id]*gradient[iu*dimension+id]/2;
}
qg+=(scores[u*units+iu]==0 || scores[u*units+iu]==1 ? 0.0001 : scores[u*units+iu]*(1-scores[u*units+iu]))*sqg*keys[iu*dimension+d]/koef;
//---
kg+=(scores[iu*units+u]==0 || scores[iu*units+u]==1 ? 0.0001 : scores[iu*units+u]*(1-scores[iu*units+u]))*skg*querys[iu*dimension+d]/koef;
}
int shift=u*dimension+d;
values_g[shift]=vg;
querys_g[shift]=qg;
keys_g[shift]=kg;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
__kernel void Normalize(__global double *buffer,
int dimension)
{
int n=get_global_id(0);
int shift=n*dimension;
double mean=0;
for(int i=0;i<dimension;i++)
mean+=buffer[shift+i];
mean/=dimension;
double variance=0;
for(int i=0;i<dimension;i++)
variance+=pow(buffer[shift+i]-mean,2);
variance=sqrt(variance/dimension);
for(int i=0;i<dimension;i++)
buffer[shift+i]=(buffer[shift+i]-mean)/(variance==0 ? 1 : variance);
}
//+------------------------------------------------------------------+