//--- cl_khr_fp64 directive is used to enable work with doubles #pragma OPENCL EXTENSION cl_khr_fp64 : enable //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ __kernel void FeedForward(__global double *matrix_w, __global double *matrix_i, __global double *matrix_o, int inputs, int activation) { int i=get_global_id(0); double sum=0.0; double4 inp, weight; int shift=(inputs+1)*i; for(int k=0; k<=inputs; k=k+4) { switch(inputs-k) { case 0: inp=(double4)(1,0,0,0); weight=(double4)(matrix_w[shift+k],0,0,0); break; case 1: inp=(double4)(matrix_i[k],1,0,0); weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],0,0); break; case 2: inp=(double4)(matrix_i[k],matrix_i[k+1],1,0); weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],matrix_w[shift+k+2],0); break; case 3: inp=(double4)(matrix_i[k],matrix_i[k+1],matrix_i[k+2],1); weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],matrix_w[shift+k+2],matrix_w[shift+k+3]); break; default: inp=(double4)(matrix_i[k],matrix_i[k+1],matrix_i[k+2],matrix_i[k+3]); weight=(double4)(matrix_w[shift+k],matrix_w[shift+k+1],matrix_w[shift+k+2],matrix_w[shift+k+3]); break; } sum+=dot(inp,weight); } switch(activation) { case 0: sum=tanh(sum); break; case 1: sum=1/(1+exp(-clamp(sum,-50.0,50.0))); break; case 2: if(sum<0) sum*=0.01; break; default: break; } matrix_o[i]=sum; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ __kernel void CalcOutputGradient(__global double *matrix_t, __global double *matrix_o, __global double *matrix_ig, int activation) { int i=get_global_id(0); double temp=0; double out=matrix_o[i]; switch(activation) { case 0: temp=clamp(matrix_t[i],-1.0,1.0)-out; temp=temp*(1-pow(out==1 || out==-1 ? 0.99999999 : out,2)); break; case 1: temp=clamp(matrix_t[i],0.0,1.0)-out; temp=temp*(out==0 || out==1 ? 0.00000001 : (out*(1-out))); break; case 2: temp=(matrix_t[i]-out)*(out>=0 ? 1.0 : 0.01); break; default: temp=(matrix_t[i]-out); break; } matrix_ig[i]=temp; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ __kernel void CalcHiddenGradient(__global double *matrix_w, __global double *matrix_g, __global double *matrix_o, __global double *matrix_ig, int outputs, int activation) { int i=get_global_id(0); int inputs=get_global_size(0); double sum=0; double out=matrix_o[i]; double4 grad, weight; for(int k=0;k0 ? sqrt(vt) : l*10); switch(inputs+1-j*4) { case 2: matrix_w[wi+2]+=delta.s2; matrix_m[wi+2]=mt.s2; matrix_v[wi+2]=vt.s2; case 1: matrix_w[wi+1]+=delta.s1; matrix_m[wi+1]=mt.s1; matrix_v[wi+1]=vt.s1; case 0: matrix_w[wi]+=delta.s0; matrix_m[wi]=mt.s0; matrix_v[wi]=vt.s0; break; default: matrix_w[wi]+=delta.s0; matrix_m[wi]=mt.s0; matrix_v[wi]=vt.s0; matrix_w[wi+1]+=delta.s1; matrix_m[wi+1]=mt.s1; matrix_v[wi+1]=vt.s1; matrix_w[wi+2]+=delta.s2; matrix_m[wi+2]=mt.s2; matrix_v[wi+2]=vt.s2; matrix_w[wi+3]+=delta.s3; matrix_m[wi+3]=mt.s3; matrix_v[wi+3]=vt.s3; break; } }; //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ __kernel void FeedForwardProof(__global double *matrix_i, __global double *matrix_o, int inputs, int window, int step) { int i=get_global_id(0); int pos=i*step; double result=matrix_o[pos]; for(int k=1; k=inputs) break; result=max(result,matrix_o[shift]); } matrix_o[i]=result; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ __kernel void CalcInputGradientProof(__global double *matrix_i, __global double *matrix_g, __global double *matrix_o, __global double *matrix_ig, int outputs, int window, int step) { int i=get_global_id(0); double prev_gradient=0; double value=matrix_i[i]; int start=i-window+step; start=(start-start%step)/step; int stop=(i-i%step)/step+1; for(int out=max(0,start); out(outputs/window_out)) stop=outputs; for(int h=0;h0 ? 1 : 0); double grad=0; for(int t=0;t=inputs) break; grad+=matrix_g[t*window_out+shift_out]*(shift==window_in ? 1 : matrix_i[shift+t*step]); } double delta=learning_rates*grad + momentum*matrix_dw[i]; matrix_dw[i]=delta; matrix_w[i]+=delta; }; //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ __kernel void UpdateWeightsConvAdam(__global double *matrix_w, __global const double *matrix_g, __global const double *matrix_i, __global double *matrix_m, __global double *matrix_v, const int inputs, const double l, const double b1, const double b2, int window_in, int window_out, int step) { const int i=get_global_id(0); const int shift=i%(window_in+1); const int shift_out=(i-shift)/(window_in+1); int total=(inputs-(window_in-step))%step; total=(inputs-(window_in-step)-total)/step+(total>0 ? 1 : 0); double grad=0; for(int t=0;t=inputs) break; grad+=matrix_g[t*window_out+shift_out]*(shift==window_in ? 1 : matrix_i[shift+t*step]); } double mt=b1*matrix_m[i]+(1-b1)*grad; double vt=b2*matrix_v[i]+(1-b2)*pow(grad,2); double delta=l*mt/(vt>0 ? sqrt(vt) : l*10); matrix_w[i]+=delta; matrix_m[i]=mt; matrix_v[i]=vt; }; //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ __kernel void AttentionScore(__global double *querys, __global double *keys, __global double *score, int dimension) { int q=get_global_id(0); int shift_q=q*dimension; int units=get_global_size(0); int shift_s=q*units; double koef=sqrt((double)(units*dimension)); if(koef<1) koef=1; double sum=0; for(int k=0;k