NeuroNetworksBook/Include/realization/neurongpt.mqh

920 lines
70 KiB
MQL5
Raw Permalink Normal View History

2025-05-30 16:12:34 +02:00
<EFBFBD><EFBFBD>//+------------------------------------------------------------------+
//| NeuronGPT.mqh |
//| Copyright 2021, MetaQuotes Ltd. |
//| https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2021, MetaQuotes Ltd."
#property link "https://www.mql5.com"
//+------------------------------------------------------------------+
//| >4:;NG05< 181;8>B5:8 |
//+------------------------------------------------------------------+
#ifndef ArrayLayers
#include "arraylayers.mqh"
#endif
//+------------------------------------------------------------------+
//| Class CNeuronGPT |
//| 07=0G5=85: ;0AA >@3?=870F88 GPT 1;>:0 |
//+------------------------------------------------------------------+
class CNeuronGPT : public CNeuronBase
{
protected:
CArrayLayers *m_cQuerys;
CArrayLayers *m_cKeys;
CArrayLayers *m_cValues;
CArrayLayers *m_cScores;
CBufferDouble *m_cScoreTemp;
CArrayLayers *m_cAttentionOut;
CArrayLayers *m_cW0;
CArrayLayers *m_cFF1;
CArrayLayers *m_cFF2;
//---
int m_iLayers;
int m_iWindow;
int m_iUnits;
int m_iKeysSize;
int m_iHeads;
double m_dStd[][2];
int m_iCurrentPosition;
bool CheckArrayLayers(CArrayLayers *&layers);
public:
CNeuronGPT(void);
~CNeuronGPT(void);
//---
virtual bool Init(CLayerDescription *desc);
virtual bool SetOpenCL(CMyOpenCL *opencl);
virtual bool FeedForward(CNeuronBase *prevLayer);
virtual bool CalcHiddenGradient(CNeuronBase *prevLayer);
virtual bool CalcDeltaWeights(CNeuronBase *prevLayer);
virtual bool UpdateWeights(int batch_size, double learningRate,
double &Beta[], double &Lambda[]);
//---
virtual int GetUnits(void) const { return m_iUnits; }
virtual int GetLayers(void) const { return m_iLayers; }
//--- methods for working with files
virtual bool Save(const int file_handle);
virtual bool Load(const int file_handle);
//--- method of identifying the object
virtual int Type(void) const { return(defNeuronGPT); }
};
//+------------------------------------------------------------------+
//| >=AB@C:B>@ :;0AA0 |
//+------------------------------------------------------------------+
CNeuronGPT::CNeuronGPT(void) : m_iHeads(8),
m_iWindow(0),
m_iKeysSize(0),
m_iUnits(0),
m_iLayers(0),
m_iCurrentPosition(0)
{
m_cQuerys = new CArrayLayers();
m_cKeys = new CArrayLayers();
m_cValues = new CArrayLayers();
m_cScores = new CArrayLayers();
m_cAttentionOut = new CArrayLayers();
m_cW0 = new CArrayLayers();
m_cFF1 = new CArrayLayers();
m_cFF2 = new CArrayLayers();
}
//+------------------------------------------------------------------+
//| 5AB@C:B>@ :;0AA0 |
//+------------------------------------------------------------------+
CNeuronGPT::~CNeuronGPT(void)
{
if(m_cQuerys)
delete m_cQuerys;
if(m_cKeys)
delete m_cKeys;
if(m_cValues)
delete m_cValues;
if(m_cScores)
delete m_cScores;
if(m_cScoreTemp)
delete m_cScoreTemp;
if(m_cAttentionOut)
delete m_cAttentionOut;
if(m_cW0)
delete m_cW0;
if(m_cFF1)
delete m_cFF1;
if(m_cFF2)
delete m_cFF2;
m_iLayers = 0;
}
//+------------------------------------------------------------------+
//| CB>4 8=8F80;870F88 :;0AA0 |
//+------------------------------------------------------------------+
bool CNeuronGPT::Init(CLayerDescription *desc)
{
//--- @>25@O5< 8AE>4=K5 40==K5
if(!desc || desc.type != Type() || desc.count <= 0 || desc.window <= 0 || desc.window_out <= 0 || desc.step <= 0 || desc.layers <= 0)
return false;
//--- !>E@0=O5< :>=AB0=BK
m_iWindow = desc.window;
m_iUnits = desc.count;
m_iKeysSize = desc.window_out;
m_iHeads = desc.step;
m_iLayers = desc.layers;
if(!ArrayResize(m_dStd, m_iLayers))
return false;
//--- K7K205< <5B>4 8=8F80;870F88 @>48B5;LA:>3> :;0AA0
desc.count *= m_iWindow;
desc.window_out = 1;
desc.window = 0;
if(!CNeuronBase::Init(desc))
return false;
//--- !>740Q< 48=0<8G5A:85 <0AA82K 4;O E@0=5=8O C:070B5;59 =0 >1J5:BK 2=CB@5==8E A;>Q2
if(!CheckArrayLayers(m_cQuerys))
return false;
if(!CheckArrayLayers(m_cKeys))
return false;
if(!CheckArrayLayers(m_cValues))
return false;
if(!CheckArrayLayers(m_cScores))
return false;
if(!CheckArrayLayers(m_cAttentionOut))
return false;
if(!CheckArrayLayers(m_cW0))
return false;
if(!CheckArrayLayers(m_cFF1))
return false;
if(!CheckArrayLayers(m_cFF2))
return false;
//--- 0?CA:05< F8:; 4;O A>740=8O >1J5:B>2 2=CB@5==8E A;>Q2
for(int layer = 0; layer < m_iLayers; layer++)
{
//--- !>740Q< >?8A0=85 4;O 2=CB@5==8E =59@>==KE A;>Q2
CLayerDescription *temp = new CLayerDescription();
if(!temp)
return false;
temp.type = defNeuronBase;
temp.window = m_iWindow;
temp.count = (int)(3 * m_iKeysSize * m_iHeads);
temp.activation = ACT_None;
temp.activation_params[0] = 1;
temp.activation_params[1] = 0;
temp.optimization = desc.optimization;
//--- =8F80;878@C5< Querys
CNeuronBase *Querys = new CNeuronBase();
if(!Querys)
{
delete temp;
return false;
}
if(!Querys.Init(temp))
{
delete Querys;
delete temp;
return false;
}
if(!m_cQuerys.Add(Querys))
{
delete Querys;
delete temp;
return false;
}
//--- =8F80;878@C5< Keys
CNeuronBase *Keys = new CNeuronBase();
if(!Keys)
{
delete temp;
return false;
}
temp.window = 0;
temp.count = (int)(m_iUnits * m_iKeysSize * m_iHeads);
if(!Keys.Init(temp))
{
delete Keys;
delete temp;
return false;
}
if(!Keys.GetOutputs().m_mMatrix.Reshape(m_iUnits, m_iKeysSize * m_iHeads))
return false;
if(!m_cKeys.Add(Keys))
{
delete Keys;
delete temp;
return false;
}
//--- =8F80;878@C5< Values
CNeuronBase *Values = new CNeuronBase();
if(!Values)
{
delete temp;
return false;
}
if(!Values.Init(temp))
{
delete Values;
delete temp;
return false;
}
if(!Values.GetOutputs().m_mMatrix.Reshape(m_iUnits, m_iKeysSize * m_iHeads))
return false;
if(!m_cValues.Add(Values))
{
delete Values;
delete temp;
return false;
}
//--- =8F80;878@C5< Scores
CNeuronBase *Scores = new CNeuronBase();
if(CheckPointer(Scores) == POINTER_INVALID)
{
delete temp;
return false;
}
temp.count = (int)(m_iUnits * m_iHeads);
if(!Scores.Init(temp))
{
delete Scores;
delete temp;
return false;
}
if(!Scores.GetOutputs().m_mMatrix.Reshape(m_iHeads, m_iUnits))
return false;
if(!m_cScores.Add(Scores))
{
delete Scores;
delete temp;
return false;
}
//--- =8F80;878@C5< AttentionOut
CNeuronBase *AttentionOut = new CNeuronBase();
if(CheckPointer(AttentionOut) == POINTER_INVALID)
{
delete temp;
return false;
}
temp.count = (int)(m_iKeysSize * m_iHeads);
if(!AttentionOut.Init(temp))
{
delete AttentionOut;
delete temp;
return false;
}
if(!m_cAttentionOut.Add(AttentionOut))
{
delete AttentionOut;
delete temp;
return false;
}
//--- =8F80;878@C5< W0
CNeuronBase *W0 = new CNeuronBase();
if(CheckPointer(W0) == POINTER_INVALID)
{
delete temp;
return false;
}
temp.window = temp.count;
temp.count = m_iWindow;
temp.activation = ACT_None;
temp.activation_params[0] = 1;
temp.activation_params[1] = 0;
if(!W0.Init(temp))
{
delete W0;
delete temp;
return false;
}
if(!m_cW0.Add(W0))
{
delete W0;
delete temp;
return false;
}
//--- =8F80;878@C5< FF1
CNeuronBase *FF1 = new CNeuronBase();
if(CheckPointer(m_cFF1) == POINTER_INVALID)
{
delete temp;
return false;
}
temp.window = m_iWindow;
temp.count = temp.window * 4;
temp.activation = ACT_SWISH;
temp.activation_params[0] = 1;
temp.activation_params[1] = 0;
if(!FF1.Init(temp))
{
delete FF1;
delete temp;
return false;
}
if(!m_cFF1.Add(FF1))
{
delete FF1;
delete temp;
return false;
}
//--- =8F80;878@C5< FF2
CNeuronBase *FF2 = new CNeuronBase();
if(CheckPointer(FF2) == POINTER_INVALID)
{
delete temp;
return false;
}
temp.window = temp.count;
temp.count = m_iWindow;
temp.activation = ACT_None;
temp.activation_params[0] = 1;
temp.activation_params[1] = 0;
if(!FF2.Init(temp))
{
delete FF2;
delete temp;
return false;
}
if(!m_cFF2.Add(FF2))
{
delete FF2;
delete temp;
return false;
}
delete temp;
}
//--- ;O 8A:;NG5=88O :>?8@>20=8O 1CD5@>2 >ACI5AB28< 8E ?>4<5=C
if(m_cFF2.Total() < m_iLayers)
return false;
if(CheckPointer(m_cOutputs) != POINTER_INVALID)
delete m_cOutputs;
CNeuronBase *temp = m_cFF2.At(m_iLayers - 1);
if(CheckPointer(temp) == POINTER_INVALID)
return false;
m_cOutputs = temp.GetOutputs();
if(CheckPointer(m_cGradients) != POINTER_INVALID)
delete m_cGradients;
m_cGradients = temp.GetGradients();
//---
SetOpenCL(m_cOpenCL);
//---
return true;
}
//+------------------------------------------------------------------+
//| 5B>4 ?5@540G8 C:070B5;O =0 >1J5:B OpenCL 2> 2A5 2=CB@5==85 |
//| >1J5:BK |
//+------------------------------------------------------------------+
bool CNeuronGPT::SetOpenCL(CMyOpenCL *opencl)
{
CNeuronBase::SetOpenCL(opencl);
if(CheckPointer(m_cQuerys) != POINTER_INVALID)
m_cQuerys.SetOpencl(m_cOpenCL);
if(CheckPointer(m_cKeys) != POINTER_INVALID)
m_cKeys.SetOpencl(m_cOpenCL);
if(CheckPointer(m_cValues) != POINTER_INVALID)
m_cValues.SetOpencl(m_cOpenCL);
if(CheckPointer(m_cScores) != POINTER_INVALID)
m_cScores.SetOpencl(m_cOpenCL);
if(CheckPointer(m_cScoreTemp) != POINTER_INVALID)
m_cScoreTemp.BufferCreate(m_cOpenCL);
if(CheckPointer(m_cAttentionOut) != POINTER_INVALID)
m_cAttentionOut.SetOpencl(m_cOpenCL);
if(CheckPointer(m_cW0) != POINTER_INVALID)
m_cW0.SetOpencl(m_cOpenCL);
if(CheckPointer(m_cFF1) != POINTER_INVALID)
m_cFF1.SetOpencl(m_cOpenCL);
if(CheckPointer(m_cFF2) != POINTER_INVALID)
m_cFF2.SetOpencl(m_cOpenCL);
//---
return(CheckPointer(m_cOpenCL) != POINTER_INVALID);
}
//+------------------------------------------------------------------+
//| 5B>4 ?@O<>3> ?@>E>40 |
//+------------------------------------------------------------------+
bool CNeuronGPT::FeedForward(CNeuronBase *prevLayer)
{
//--- @>25@O5< 0:BC0;L=>ABL 2A5E >1J5:B>2
if(CheckPointer(prevLayer) == POINTER_INVALID ||
CheckPointer(prevLayer.GetOutputs()) == POINTER_INVALID ||
CheckPointer(m_cQuerys) == POINTER_INVALID ||
CheckPointer(m_cValues) == POINTER_INVALID ||
CheckPointer(m_cKeys) == POINTER_INVALID ||
CheckPointer(m_cScores) == POINTER_INVALID ||
CheckPointer(m_cAttentionOut) == POINTER_INVALID ||
CheckPointer(m_cW0) == POINTER_INVALID ||
CheckPointer(m_cFF1) == POINTER_INVALID ||
CheckPointer(m_cFF2) == POINTER_INVALID)
return false;
//--- #25;8G8205< C:070B5;L =0 B5:CI89 >1J5:B 2 AB5:5 40==KE
m_iCurrentPosition++;
if(m_iCurrentPosition >= m_iUnits)
m_iCurrentPosition = 0;
//--- 0?CA:05< F8:; ?5@51>@0 2A5E 2=CB@5==8E A;>Q2
CNeuronBase *prevL = prevLayer;
for(int layer = 0; layer < m_iLayers; layer++)
{
CNeuronBase *Querys = m_cQuerys.At(layer);
if(CheckPointer(Querys) == POINTER_INVALID ||
!Querys.FeedForward(prevL))
return false;
CNeuronBase *Keys = m_cKeys.At(layer);
if(CheckPointer(Keys) == POINTER_INVALID)
return false;
CNeuronBase *Values = m_cValues.At(layer);
if(CheckPointer(Values) == POINTER_INVALID)
return false;
MATRIX array[];
if(!Querys.GetOutputs().m_mMatrix.Vsplit(3, array))
return false;
if(!Keys.GetOutputs().m_mMatrix.Row(array[1].Row(0), m_iCurrentPosition))
return false;
if(!Values.GetOutputs().m_mMatrix.Row(array[2].Row(0), m_iCurrentPosition))
return false;
//--- =8F80;878@C5< Scores
CNeuronBase *Scores = m_cScores.At(layer);
if(CheckPointer(Scores) == POINTER_INVALID)
return false;
//--- =8F80;878@C5< AttentionOut
CNeuronBase *AttentionOut = m_cAttentionOut.At(layer);
if(CheckPointer(AttentionOut) == POINTER_INVALID)
return false;
//--- 0725B2;5=85 0;3>@8B<0 ?> 2KG8A;8B5;L=><C CAB@>9AB2C
if(CheckPointer(m_cOpenCL) == POINTER_INVALID)
{
MATRIX out;
if(!out.Init(m_iHeads, m_iKeysSize))
return false;
MATRIX array_keys[], array_values[];
MATRIX array_querys[];
MATRIX keys = Keys.GetOutputs().m_mMatrix;
MATRIX values = Values.GetOutputs().m_mMatrix;
if(!array[0].Vsplit(m_iHeads, array_querys))
return false;
if(!keys.Reshape(m_iUnits, m_iHeads * m_iKeysSize))
return false;
if(!keys.Vsplit(m_iHeads, array_keys))
return false;
if(!values.Reshape(m_iUnits, m_iHeads * m_iKeysSize))
return false;
if(!values.Vsplit(m_iHeads, array_values))
return false;
//--- ?@545;O5< Scores
for(int head = 0; head < m_iHeads; head++)
{
MATRIX score = array_querys[head].MatMul(array_keys[head].Transpose()) / sqrt(m_iKeysSize);
for(int s = 0; s < m_iUnits; s++)
score[0, s] = MathExp(score[0, s]);
//--- >@<0;87C5< Scores
score = score / (score.Sum() + 1e-8);
if(!Scores.GetOutputs().m_mMatrix.Row(score.Row(0), head))
return false;
MATRIX o = score.MatMul(array_values[head].Transpose());
if(!o.Reshape(1, m_iKeysSize) ||
!out.Row(o.Row(0), head))
return false;
}
if(!out.Reshape(1, m_iHeads * m_iKeysSize))
return false;
AttentionOut.GetOutputs().m_mMatrix = out;
}
else // ;>: OpenCL
{
//--- !>740=85 1CD5@>2 40==KE
if(Querys.GetOutputs().GetIndex() < 0)
return false;
if(Keys.GetOutputs().GetIndex() < 0)
return false;
if(Values.GetOutputs().GetIndex() < 0)
return false;
if(Scores.GetOutputs().GetIndex() < 0)
return false;
if(AttentionOut.GetOutputs().GetIndex() < 0)
return false;
//--- 5@540G0 ?0@0<5B@>2 :5@=5;C
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTFeedForward, def_gptff_keys, Keys.GetOutputs().GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTFeedForward, def_gptff_outputs, AttentionOut.GetOutputs().GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTFeedForward, def_gptff_querys, Querys.GetOutputs().GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTFeedForward, def_gptff_scores, Scores.GetOutputs().GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTFeedForward, def_gptff_values, Values.GetOutputs().GetIndex()))
return false;
if(!m_cOpenCL.SetArgument(def_k_GPTFeedForward, def_gptff_key_size, m_iKeysSize))
return false;
if(!m_cOpenCL.SetArgument(def_k_GPTFeedForward, def_gptff_units, m_iUnits))
return false;
//--- >AB0=>2:0 :5@=5;0 2 >G5@54L 2K?>;=5=8O
int off_set[] = {0};
int NDRange[] = {m_iHeads};
if(!m_cOpenCL.Execute(def_k_GPTFeedForward, 1, off_set, NDRange))
return false;
//--- !G8BK20=885 @57C;LB0B>2 >?5@0F89
if(!AttentionOut.GetOutputs().BufferRead())
return false;
}
//--- 725H5==K9 2KEI4 2A5E 3>;>2 2=8<0=8O
CNeuronBase *W0 = m_cW0.At(layer);
if(CheckPointer(W0) == POINTER_INVALID ||
!W0.FeedForward(AttentionOut))
return false;
//--- !C<<8@C5< A 8AE>4=K<8 40==K<8 8 =>@<0;87C5<
W0.GetOutputs().m_mMatrix += prevL.GetOutputs().m_mMatrix;
double mean = W0.GetOutputs().m_mMatrix.Mean();
m_dStd[layer][0] = W0.GetOutputs().m_mMatrix.Std();
W0.GetOutputs().m_mMatrix = (W0.GetOutputs().m_mMatrix - mean) / m_dStd[layer][0];
if(m_cOpenCL && !W0.GetOutputs().BufferWrite())
return false;
//--- @O<>9 ?@>E>4 1;>:0 Feed Forward
CNeuronBase *FF1 = m_cFF1.At(layer);
if(CheckPointer(FF1) == POINTER_INVALID ||
!FF1.FeedForward(W0))
return false;
CNeuronBase *FF2 = m_cFF2.At(layer);
if(CheckPointer(FF2) == POINTER_INVALID ||
!FF2.FeedForward(FF1))
return false;
//--- !C<<8@C5< A 2KE>4>< 2=8<0=8O 8 =>@<0;87C5<
CBufferDouble *prev = FF2.GetOutputs();
prev.m_mMatrix += W0.GetOutputs().m_mMatrix;
mean = prev.m_mMatrix.Mean();
m_dStd[layer][1] = prev.m_mMatrix.Std();
prev.m_mMatrix = (prev.m_mMatrix - mean) / m_dStd[layer][1];
if(m_cOpenCL && !prev.BufferWrite())
return false;
prevL = FF2;
}
//---
return true;
}
//+------------------------------------------------------------------+
//| 5B>4 @0A?@545;5=8O 3@0485=B0 G5@57 A:@KBK9 A;>9 |
//+------------------------------------------------------------------+
bool CNeuronGPT::CalcHiddenGradient(CNeuronBase *prevLayer)
{
//--- @>25@O5< 0:BC0;L=>ABL 2A5E >1J5:B>2
if(CheckPointer(m_cOutputs) == POINTER_INVALID ||
CheckPointer(m_cGradients) == POINTER_INVALID ||
CheckPointer(m_cScores) == POINTER_INVALID ||
CheckPointer(m_cFF2) == POINTER_INVALID ||
CheckPointer(m_cFF1) == POINTER_INVALID ||
CheckPointer(m_cW0) == POINTER_INVALID ||
CheckPointer(m_cAttentionOut) == POINTER_INVALID ||
CheckPointer(m_cQuerys) == POINTER_INVALID ||
CheckPointer(m_cKeys) == POINTER_INVALID ||
CheckPointer(m_cValues) == POINTER_INVALID ||
m_cOutputs.Total() != m_cGradients.Total())
return false;
//--- 0?CA:05< F8:; ?5@51>@0 2A5E 2=CB@5==8E A;>Q2 2 >1@0B=>< ?>@O4:5
for(int layer = m_iLayers - 1; layer >= 0; layer--)
{
CNeuronBase *FF2 = m_cFF2.At(layer);
if(CheckPointer(FF2) == POINTER_INVALID)
return false;
CBufferDouble *Gradients = FF2.GetGradients();
if(m_dStd[layer][1] != 0 && Gradients.Scaling(1 / m_dStd[layer][1]) <= 0)
return false;
//--- @>2>48< 3@0485=B G5@57 1;>: Feed Forward
CNeuronBase *FF1 = m_cFF1.At(layer);
if(!FF2.CalcHiddenGradient(FF1))
return false;
CNeuronBase *W0 = m_cW0.At(layer);
if(!FF1.CalcHiddenGradient(W0))
return false;
CBufferDouble *attention_grad = W0.GetGradients();
if(!attention_grad.SumArray(Gradients))
return false;
//--- =8F80;878@C5< Scores
CNeuronBase *Scores = m_cScores.At(layer);
if(CheckPointer(Scores) == POINTER_INVALID)
return false;
if(m_dStd[layer][0] != 0 && attention_grad.Scaling(1 / m_dStd[layer][0]) <= 0)
return false;
//--- 0A?@545;5O5< 3@0485=B >H81:8 ?> 3>;>20< 2=8<0=8O
CNeuronBase *AttentionOut = m_cAttentionOut.At(layer);
if(!W0.CalcHiddenGradient(AttentionOut))
return false;
//--- >;CG05< C:070B5;8 =0 >1J5:BK Querys, Keys, Values
CNeuronBase *Querys = m_cQuerys.At(layer);
if(CheckPointer(Querys) == POINTER_INVALID)
return false;
CNeuronBase *Keys = m_cKeys.At(layer);
if(CheckPointer(Keys) == POINTER_INVALID)
return false;
CNeuronBase *Values = m_cValues.At(layer);
if(CheckPointer(Values) == POINTER_INVALID)
return false;
//--- 0725B2;5=85 0;3>@8B<0 ?> 2KG8A;8B5;L=><C CAB@>9AB2C
attention_grad = AttentionOut.GetGradients();
if(CheckPointer(m_cOpenCL) == POINTER_INVALID)
{
MATRIX gradients[];
if(!attention_grad.m_mMatrix.Vsplit(m_iHeads, gradients))
return false;
//--- 0A?@545;5=85 3@0485=B0 =0 Values
if(!Querys.GetGradients().m_mMatrix.Reshape(3, m_iHeads * m_iKeysSize))
return false;
MATRIX values[];
if(!Values.GetOutputs().m_mMatrix.Vsplit(m_iHeads, values))
return false;
MATRIX querys_gard;
MATRIX keys_gard;
MATRIX values_gard;
if(!querys_gard.Init(m_iHeads, m_iKeysSize) ||
!keys_gard.Init(m_iHeads, m_iKeysSize) ||
!values_gard.Init(m_iHeads, m_iKeysSize))
return false;
for(int head = 0; head < m_iHeads; head++)
{
//if(!values_gard.Row((gradients[head]*Scores[head, m_iCurrentPosition]).Row(0), head))
// return false;
//--- 0A?@545;5=85 3@0485=B0 =0 Querys 8 Keys
//MATRIX score_grad = gradients[head].MatMul(values[head].Transpose());
////---
//VECTOR temp;
//temp.Init(m_iUnits);
//VECTOR s = Scores.Row(head);
//for(int c = 0; c < m_iUnits; c++)
// temp[c]=((int)(c==m_iCurrentPosition) - s);
//s = s.MatMul(temp);
//if(!gradients[head].Row(s * gradients[head].Row(r) / sqrt(m_iKeysSize), r))
// return false;
//temp = gradients[head].MatMul(keys[head]);
//if(!temp.Reshape(1, m_iUnits * m_iKeysSize) ||
// !querys_grad.Row(temp.Row(0), head))
// return false;
//temp = gradients[head].Transpose().MatMul(querys[head]);
//if(!temp.Reshape(1, m_iUnits * m_iKeysSize) ||
// !keys_grad.Row(temp.Row(0), head))
// return false;
}
//if(!Querys.GetGradients().UpdateArray(2 * m_iKeysSize * m_iHeads, values, 0, m_iKeysSize * m_iHeads))
// return false;
//if(!Querys.GetGradients().UpdateArray(0, querys_grad, 0, 2 * keys_total))
// return false;
}
else // ;>: OpenCL
{
//--- !>740=85 1CD5@>2 40==KE
if(Values.GetOutputs().GetIndex() < 0)
return false;
if(Querys.GetGradients().GetIndex() < 0)
return false;
if(Scores.GetOutputs().GetIndex() < 0)
return false;
if(attention_grad.GetIndex() < 0)
return false;
if(Scores.GetGradients().GetIndex() < 0)
return false;
//---
if(m_cScoreTemp.GetIndex() < 0)
return false;
//--- 5@540G0 ?0@0<5B@>2 :5@=5;C
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTScoreGradients, def_gptscr_outputs_grad, attention_grad.GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTScoreGradients, def_gptscr_scores, Scores.GetOutputs().GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTScoreGradients, def_gptscr_scores_grad, Scores.GetGradients().GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTScoreGradients, def_gptscr_scores_temp, m_cScoreTemp.GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTScoreGradients, def_gptscr_values, Values.GetOutputs().GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTScoreGradients, def_gptscr_values_grad, Querys.GetGradients().GetIndex()))
return false;
if(!m_cOpenCL.SetArgument(def_k_GPTScoreGradients, def_gptscr_window, m_iKeysSize))
return false;
if(!m_cOpenCL.SetArgument(def_k_GPTScoreGradients, def_gptscr_units, m_iUnits))
return false;
if(!m_cOpenCL.SetArgument(def_k_GPTScoreGradients, def_gptscr_current, m_iCurrentPosition))
return false;
//--- >AB0=>2:0 :5@=5;0 2 >G5@54L 2K?>;=5=8O
int off_set[] = {0};
int NDRange[] = {m_iHeads};
if(!m_cOpenCL.Execute(def_k_GPTScoreGradients, 1, off_set, NDRange))
return false;
//--- 03@C7:0 @57C;LB0B>2
if(!Querys.GetGradients().BufferRead())
return false;
//---
if(Querys.GetOutputs().GetIndex() < 0)
return false;
if(Keys.GetOutputs().GetIndex() < 0)
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTHiddenGradients, def_gpthgr_keys, Keys.GetOutputs().GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTHiddenGradients, def_gpthgr_querys, Querys.GetOutputs().GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTHiddenGradients, def_gpthgr_querys_grad, Querys.GetGradients().GetIndex()))
return false;
if(!m_cOpenCL.SetArgumentBuffer(def_k_GPTHiddenGradients, def_gpthgr_scores_grad, Scores.GetGradients().GetIndex()))
return false;
if(!m_cOpenCL.SetArgument(def_k_GPTHiddenGradients, def_gpthgr_key_size, m_iKeysSize))
return false;
if(!m_cOpenCL.SetArgument(def_k_GPTHiddenGradients, def_gpthgr_units, m_iUnits))
return false;
if(!m_cOpenCL.SetArgument(def_k_GPTHiddenGradients, def_gpthgr_current, m_iCurrentPosition))
return false;
if(!m_cOpenCL.Execute(def_k_GPTHiddenGradients, 1, off_set, NDRange))
return false;
//--- 03@C7:0 @57C;LB0B>2
if(!Querys.GetGradients().BufferRead())
return false;
}
//--- 5@5=>A 3@0485=B0 >H81:8 =0 ?@54K4CI89 A;>9
CNeuronBase *prevL = (layer == 0 ? prevLayer : m_cFF2.At(layer - 1));
if(!Querys.CalcHiddenGradient(prevL))
return false;
if(!prevL.GetGradients().SumArray(W0.GetGradients()))
return false;
}
//---
return true;
}
//+------------------------------------------------------------------+
//| 5B>4 @0A?@545;5=8O 3@0485=B>2 >H81:8 4> <0B@8F 25A>2KE |
//| :>MDD8F85=B>2 |
//+------------------------------------------------------------------+
bool CNeuronGPT::CalcDeltaWeights(CNeuronBase *prevLayer)
{
//--- @>25@O5< 0:BC0;L=>ABL 2A5E >1J5:B>2
if(CheckPointer(m_cFF2) == POINTER_INVALID)
return false;
if(CheckPointer(m_cFF1) == POINTER_INVALID)
return false;
if(CheckPointer(m_cW0) == POINTER_INVALID)
return false;
if(CheckPointer(m_cAttentionOut) == POINTER_INVALID)
return false;
if(CheckPointer(m_cQuerys) == POINTER_INVALID)
return false;
//---  F8:;5 2K7K205< 0=0;>38G=K9 <5B>4 4;O :064>3> 2=CB@5==53> >1J5:B0
for(int layer = 0; layer < m_iLayers; layer++)
{
if(CheckPointer(m_cFF2.At(layer)) == POINTER_INVALID)
return false;
CNeuronBase *temp = m_cFF2.At(layer);
if(!temp.CalcDeltaWeights(m_cFF1.At(layer)))
return false;
temp = m_cFF1.At(layer);
if(!temp.CalcDeltaWeights(m_cW0.At(layer)))
return false;
temp = m_cW0.At(layer);
if(!temp.CalcDeltaWeights(m_cAttentionOut.At(layer)))
return false;
temp = m_cQuerys.At(layer);
if(CheckPointer(temp) == POINTER_INVALID)
return false;
CNeuronBase *prevL = (layer == 0 ? prevLayer : m_cFF2.At(layer - 1));
if(!temp.CalcDeltaWeights(prevL))
return false;
}
//---
return true;
}
//+------------------------------------------------------------------+
//| 5B>4 >1=>2;5=8O ?0@0<5B@>2 <0B@8FK 25A>2KE :>MDD8F85=B>2 |
//+------------------------------------------------------------------+
bool CNeuronGPT::UpdateWeights(int batch_size, double learningRate, double &Beta[], double &Lambda[])
{
//--- ;>: :>=B@>;59
if(CheckPointer(m_cFF2) == POINTER_INVALID)
return false;
if(CheckPointer(m_cFF1) == POINTER_INVALID)
return false;
if(CheckPointer(m_cW0) == POINTER_INVALID)
return false;
if(CheckPointer(m_cQuerys) == POINTER_INVALID)
return false;
//---  F8:;5 2K7K205< 0=0;>38G=K9 <5B>4 4;O :064>3> 2=CB@5==53> >1J5:B0
for(int layer = 0; layer < m_iLayers; layer++)
{
CNeuronBase *temp = m_cFF2.At(layer);
if(CheckPointer(temp) == POINTER_INVALID ||
!temp.UpdateWeights(batch_size, learningRate, Beta, Lambda))
return false;
temp = m_cFF1.At(layer);
if(CheckPointer(temp) == POINTER_INVALID ||
!temp.UpdateWeights(batch_size, learningRate, Beta, Lambda))
return false;
temp = m_cW0.At(layer);
if(CheckPointer(temp) == POINTER_INVALID ||
!temp.UpdateWeights(batch_size, learningRate, Beta, Lambda))
return false;
temp = m_cQuerys.At(layer);
if(CheckPointer(temp) == POINTER_INVALID ||
!temp.UpdateWeights(batch_size, learningRate, Beta, Lambda))
return false;
}
//---
return true;
}
//+------------------------------------------------------------------+
//| 5B>4 A>E@0=5=8O M;5<5=B>2 :;0AA0 2 D09; |
//+------------------------------------------------------------------+
bool CNeuronGPT::Save(const int file_handle)
{
//--- K7>2 <5B>40 @>48B5;LA:>3> :;0AA0
if(!CNeuronBase::Save(file_handle))
return false;
//--- !>E@0=O5< :>=AB0=BK
if(FileWriteInteger(file_handle, m_iLayers) <= 0)
return false;
if(FileWriteInteger(file_handle, m_iWindow) <= 0)
return false;
if(FileWriteInteger(file_handle, m_iKeysSize) <= 0)
return false;
if(FileWriteInteger(file_handle, m_iHeads) <= 0)
return false;
if(FileWriteInteger(file_handle, m_iUnits) <= 0)
return false;
if(FileWriteInteger(file_handle, m_iCurrentPosition) <= 0)
return false;
//--- K7K205< 0=0;>38G=K9 <5B>4 4;O 2A5E :>;5:F89 2=CB@5==8E A;>Q2
if(CheckPointer(m_cQuerys) == POINTER_INVALID ||
!m_cQuerys.Save(file_handle))
return false;
if(CheckPointer(m_cKeys) == POINTER_INVALID ||
!m_cKeys.Save(file_handle))
return false;
if(CheckPointer(m_cValues) == POINTER_INVALID ||
!m_cValues.Save(file_handle))
return false;
if(CheckPointer(m_cScores) == POINTER_INVALID ||
!m_cScores.Save(file_handle))
return false;
if(CheckPointer(m_cAttentionOut) == POINTER_INVALID ||
!m_cAttentionOut.Save(file_handle))
return false;
if(CheckPointer(m_cW0) == POINTER_INVALID ||
!m_cW0.Save(file_handle))
return false;
if(CheckPointer(m_cFF1) == POINTER_INVALID ||
!m_cFF1.Save(file_handle))
return false;
if(CheckPointer(m_cFF2) == POINTER_INVALID ||
!m_cFF2.Save(file_handle))
return false;
//---
return true;
}
//+------------------------------------------------------------------+
//| 5B>4 2>AAB0=>2;5=8O @01>BK :;0AA0 87 D09;0 |
//+------------------------------------------------------------------+
bool CNeuronGPT::Load(const int file_handle)
{
//--- K7>2 <5B>40 @>48B5;LA:>3> :;0AA0
if(!CNeuronBase::Load(file_handle))
return false;
//--- !G8BK205< :>=AB0=BK 87 D09;0
m_iLayers = FileReadInteger(file_handle);
m_iWindow = FileReadInteger(file_handle);
m_iKeysSize = FileReadInteger(file_handle);
m_iHeads = FileReadInteger(file_handle);
m_iUnits = FileReadInteger(file_handle);
m_iCurrentPosition = FileReadInteger(file_handle);
if(ArrayResize(m_dStd, m_iLayers) <= 0)
return false;
//--- K7K205< 0=0;>38G=K9 <5B>4 4;O 2A5E :>;5:F89 2=CB@5==8E A;>Q2
if(!CheckArrayLayers(m_cQuerys) ||
!m_cQuerys.Load(file_handle))
return false;
if(!CheckArrayLayers(m_cKeys) ||
!m_cKeys.Load(file_handle))
return false;
if(!CheckArrayLayers(m_cValues) ||
!m_cValues.Load(file_handle))
return false;
if(!CheckArrayLayers(m_cScores) ||
!m_cScores.Load(file_handle))
return false;
if(!CheckArrayLayers(m_cAttentionOut) ||
!m_cAttentionOut.Load(file_handle))
return false;
if(!CheckArrayLayers(m_cW0) ||
!m_cW0.Load(file_handle))
return false;
if(!CheckArrayLayers(m_cFF1) ||
!m_cFF1.Load(file_handle))
return false;
if(!CheckArrayLayers(m_cFF2) ||
!m_cFF2.Load(file_handle))
return false;
//--- ACI5AB2;O5< ?>4<5=C 1CD5@>2 40==KE 4;O 8A:;NG5=8O 87;8H=53> :>?8@>20=8O
CNeuronBase *last = m_cFF2.At(m_cFF2.Total() - 1);
if(CheckPointer(last) == POINTER_INVALID)
return false;
if(CheckPointer(m_cOutputs) != POINTER_INVALID)
delete m_cOutputs;
m_cOutputs = last.GetOutputs();
if(CheckPointer(m_cGradients) != POINTER_INVALID)
delete m_cGradients;
m_cGradients = last.GetGradients();
//---
return true;
}
//+------------------------------------------------------------------+
//| 5B>4 ?@>25@:8 0:BC0;L=>AB8 C:070B5;O =0 >1J5:B :>;;5:F88 |
//| =59@>=KE A;>Q2 |
//+------------------------------------------------------------------+
bool CNeuronGPT::CheckArrayLayers(CArrayLayers *&layers)
{
if(CheckPointer(layers) == POINTER_INVALID)
layers = new CArrayLayers();
//---
return CheckPointer(layers) != POINTER_INVALID;
}
//+------------------------------------------------------------------+