Article-22714-Volatility-Mo.../Include/slsqp_article/Regression/OLS.mqh

407 lines
14 KiB
MQL5
Raw Permalink Normal View History

2026-06-03 20:14:05 +02:00
//+------------------------------------------------------------------+
//| OLS.mqh |
//| Copyright 2023, MetaQuotes Ltd. |
//| https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2023, MetaQuotes Ltd."
#property link "https://www.mql5.com"
//+------------------------------------------------------------------+
//| matrix multiplication |
//+------------------------------------------------------------------+
template<typename T>
matrix<T> matmul(const matrix<T>& matrix_a, const matrix<T>& matrix_b)
{
matrix<T> matrix_c = matrix<T>::Zeros(0,0);
if(matrix_a.Cols()!=matrix_b.Rows())
return(matrix_c);
ulong M=matrix_a.Rows();
ulong K=matrix_a.Cols();
ulong N=matrix_b.Cols();
matrix_c=matrix<T>::Zeros(M,N);
for(ulong m=0; m<M; ++m)
for(ulong k=0; k<K; ++k)
for(ulong n=0; n<N; ++n)
matrix_c[m][n]+=matrix_a[m][k]*matrix_b[k][n];
return(matrix_c);
}
//+------------------------------------------------------------------+
//| Matrix multiply with vector |
//+------------------------------------------------------------------+
template<typename T>
vector<T> matmul(const matrix<T>& matrix_a, const vector<T>& vector_b)
{
matrix<T> matrix_b = matrix<T>::Zeros(vector_b.Size(),1);
matrix_b.Col(vector_b,0);
matrix<T>out = matmul(matrix_a,matrix_b);
return out.Col(0);
}
//+------------------------------------------------------------------+
//| Ordinary least squares class |
//+------------------------------------------------------------------+
class OLS
{
private:
bool m_const_prepended; //check column for constant in design matrix
matrix m_exog, //design matrix
m_pinv, //pseudo-inverse of matrix
m_cov_params, //covariance of matrix
m_m_error, //error matrix
m_norm_cov_params; //normalized covariance matrix
vector m_endog, //dependent variables
m_weights, //weights
m_singularvalues, //singular values of solution
m_params, //coefficients of regression model(solution)
m_tvalues, //test statistics of model
m_bse, //standard errors of model
m_const_cols, //mark constant columns in design matrix
m_resid; //residuals of model
ulong m_obs, //number of observations
m_model_dof, //degrees of freedom of model
m_resid_dof, //degrees of freedom of residuals
m_kconstant, //number of constants
m_rank; //rank of design matrix
double m_aic, //Akiake information criteria
m_bic, //Bayesian information criteria
m_scale, //scale of model
m_llf, //loglikelihood of model
m_sse, //sum of squared errors
m_rsqe, //r-squared of model
m_centeredtss, //centered sum of squares
m_uncenteredtss; //uncentered sum of squares
uint m_error; //error flag
// private methods
ulong countconstants(void);
void scale(void);
void sse(void);
void rsqe(void);
void centeredtss(void);
void uncenteredtss(void);
void aic(void);
void bic(void);
void bse(void);
void llf(void);
void tvalues(void);
void covariance_matrix(void);
public:
//constructor
OLS(void);
//destructor
~OLS(void);
//public methods
bool Fit(vector &y_vars,matrix &x_vars);
double Predict(vector &inputs);
double Predict(double _input);
//get properties of OLS model
ulong ModelDOF(void) { if(m_error) return 0; else return m_model_dof;}
ulong ResidDOF(void) { if(m_error) return 0; else return m_resid_dof;}
double Scale(void) { if(m_error) return EMPTY_VALUE; else return m_scale; }
double Aic(void) { if(m_error) return EMPTY_VALUE; else return m_aic; }
double Bic(void) { if(m_error) return EMPTY_VALUE; else return m_bic; }
double Sse(void) { if(m_error) return EMPTY_VALUE; else return m_sse; }
double Rsqe(void) { if(m_error) return EMPTY_VALUE; else return m_rsqe; }
double C_tss(void) { if(m_error) return EMPTY_VALUE; else return m_centeredtss;}
double Loglikelihood(void) { if(m_error) return EMPTY_VALUE; return m_llf; }
vector Tvalues(void) { if(m_error) return m_m_error.Col(0); return m_tvalues; }
vector Residuals(void) { if(m_error) return m_m_error.Col(0); return m_resid; }
vector ModelParameters(void) { if(m_error) return m_m_error.Col(0); return m_params; }
vector Bse(void) { if(m_error) return m_m_error.Col(0); return m_bse; }
matrix CovarianceMatrix(void) { if(m_error) return m_m_error; return m_cov_params; }
};
//+------------------------------------------------------------------+
//| constructor |
//+------------------------------------------------------------------+
OLS::OLS(void)
{
m_kconstant = 0;
m_obs = 0;
m_model_dof = 0;
m_resid_dof = 0;
m_kconstant = 0;
m_rank = 0;
m_aic = 0;
m_bic = 0;
m_scale = 0;
m_llf = 0;
m_error = 1;
m_m_error.Resize(2,2);
m_m_error.Fill(EMPTY_VALUE);
}
//+------------------------------------------------------------------+
//|Destructor |
//+------------------------------------------------------------------+
OLS::~OLS(void)
{
}
//+------------------------------------------------------------------+
//| Fit data |
//+------------------------------------------------------------------+
bool OLS::Fit(vector &y_vars,matrix &x_vars)
{
//---re-initialize internal properties
m_endog = y_vars;
//---
m_exog = x_vars;
//---
m_kconstant = countconstants();
//---
m_error = 0;
//---
m_model_dof = m_resid_dof = m_rank = m_obs= 0;
//---
m_aic = m_bic= m_llf=0;
//---
m_obs=m_exog.Rows();
//---
m_weights = vector::Ones(m_obs);
//---
m_rank = m_exog.Rank();
//---
m_model_dof = m_rank - m_kconstant;
//---
m_resid_dof = m_obs - m_rank;
//---
if(y_vars.Size()!=x_vars.Rows())
{
Print(__FUNCTION__," ",__LINE__," Error Invalid inputs Rows in x_vars not equal to length of y_vars");
m_error = 1;
return false;
}
//---
matrix U,V;
//---
::ResetLastError();
//--- SVD operation
if(!m_exog.SVD(U,V,m_singularvalues))
{
Print(__FUNCTION__," ",__LINE__," SVD operation failed : ",::GetLastError());
m_error = 1;
return false;
}
//--- Compute the pseudo-inverse of a matrix by the Moore-Penrose method
m_pinv = m_exog.PInv();
//--- transpose m_pinv
matrix pinv_t = m_pinv.Transpose();
//--- normalize covariance matrix
m_norm_cov_params = matmul(m_pinv,pinv_t);
//---
matrix diag;
if(!diag.Diag(m_singularvalues))
{
Print(__FUNCTION__," ",__LINE__," Diag operation failed : ",::GetLastError());
m_error = 1;
return false;
}
//--- rank of diag
m_rank = diag.Rank();
//--- estimate parameters of the model
m_params = matmul(m_pinv,m_endog);
//--- caluclate its degrees of freedom
m_model_dof = m_rank - m_kconstant;
//--- as well as those of the residuals
m_resid_dof = m_obs - m_rank;
//---get the residuals
m_resid = m_endog - matmul(m_exog,m_params);
//---compute scale of model
scale();
//--- compute covariance matrix
covariance_matrix();
//--- compute standard errors
bse();
//--- compute sum of squared errors
sse();
//--- compute loglikelihood function
llf();
//--- compute centered sum of squares
centeredtss();
//--- compute uncentered sum of squares
uncenteredtss();
//--- compute r-squared
rsqe();
//--- compute AIC
aic();
//--- compute BIC
bic();
//--- compute tvalues
tvalues();
//---
return true;
}
//+------------------------------------------------------------------+
//| Predict next value based on model parameters |
//+------------------------------------------------------------------+
double OLS::Predict(vector &inputs)
{
//---
if(m_error)
{
Print("Invalid model");
return EMPTY_VALUE;
}
//---
if(inputs.Size()!=(m_params.Size()-m_kconstant))
{
Print("invalid inputs: supplied vector does not match size of model parameters");
return EMPTY_VALUE;
}
//---
double prediction = 0;
//---
for(ulong i = 0,k = 0;i<m_const_cols.Size(); i++)
{
if(!m_const_cols[i])
prediction+=(m_params[i]*inputs[k++]);
else
prediction+=(m_params[i]);
}
//---
return prediction;
}
//+------------------------------------------------------------------+
//| Predict next value based on model parameters |
//+------------------------------------------------------------------+
double OLS::Predict(double _input)
{
//---
if(m_error || (m_params.Size()-m_kconstant)>1)
{
if(m_error)
Print("Invalid model");
else
Print("invalid inputs: insufficient number of predictors supplied");
return EMPTY_VALUE;
}
//---
double prediction = 0;
//---
if(m_kconstant)
prediction=(m_const_cols[0])?m_params[0]+(m_params[1]*_input):m_params[1]+(m_params[0]*_input);
else
prediction=m_params[0]*_input;
//---
return prediction;
}
//+------------------------------------------------------------------+
//|Count the number of constants in RHS matrix |
//+------------------------------------------------------------------+
ulong OLS::countconstants(void)
{
ulong count = 0;
vector temp;
m_const_cols = vector::Zeros(m_exog.Cols());
for(ulong i =0; i<m_exog.Cols(); i++)
{
temp = m_exog.Col(i);
if((temp.Max()-temp.Min()) == 0.0 && temp.Max() == 1.0)
{
count++;
m_const_cols[i]=1.0;
}
}
return count;
}
//+------------------------------------------------------------------+
//|scale factor for the covariance matrix |
//+------------------------------------------------------------------+
void OLS::scale(void)
{
m_scale = m_resid.Dot(m_resid)/double(m_resid_dof);
}
//+------------------------------------------------------------------+
//| the variance/covariance matrix |
//+------------------------------------------------------------------+
void OLS::covariance_matrix(void)
{
//---
m_cov_params=m_norm_cov_params*m_scale;
}
//+------------------------------------------------------------------+
//| standard errors of the parameter estimates |
//+------------------------------------------------------------------+
void OLS::bse(void)
{
m_bse=m_cov_params.Diag();
m_bse=MathSqrt(m_bse);
}
//+------------------------------------------------------------------+
//| sum of squared errors |
//+------------------------------------------------------------------+
void OLS::sse(void)
{
vector sqresid=MathPow(m_resid,2);
m_sse = sqresid.Sum();
}
//+------------------------------------------------------------------+
//|likelihood function for the OLS model |
//+------------------------------------------------------------------+
void OLS::llf(void)
{
double obs2=double(m_obs)/2.0;
m_llf = -obs2*log(2.0*M_PI) - obs2*log(m_sse / double(m_obs)) - obs2;
//m_llf = -1*obs2*MathLog(2.0*M_PI*m_scale) - m_sse/(2.0*m_scale);
}
//+------------------------------------------------------------------+
//| Akaike's information criteria |
//+------------------------------------------------------------------+
void OLS::aic(void)
{
double params = double(m_model_dof)+double(m_kconstant);
m_aic = -2*m_llf+2*params;
}
//+------------------------------------------------------------------+
//|Bayes' information criteria |
//+------------------------------------------------------------------+
void OLS::bic(void)
{
double params = double(m_model_dof)+double(m_kconstant);
m_bic = -2*m_llf+MathLog(m_obs)*params;
}
//+------------------------------------------------------------------+
//|t-statistic for a given parameter estimate |
//+------------------------------------------------------------------+
void OLS::tvalues(void)
{
m_tvalues = m_params/m_bse;
}
//+------------------------------------------------------------------+
//|total sum of squares centered about the mean |
//+------------------------------------------------------------------+
void OLS::centeredtss(void)
{
vector centered_endog = m_endog - m_endog.Mean();
//---
m_centeredtss = centered_endog.Dot(centered_endog);
}
//+------------------------------------------------------------------+
//| The sum of the squared values of the endogenous response variable|
//+------------------------------------------------------------------+
void OLS::uncenteredtss(void)
{
m_uncenteredtss = m_endog.Dot(m_endog);
}
//+------------------------------------------------------------------+
//|R-squared of the model |
//+------------------------------------------------------------------+
void OLS::rsqe(void)
{
m_rsqe = (m_kconstant)? 1 - m_sse/m_centeredtss: 1 - m_sse/m_uncenteredtss;
}
//+------------------------------------------------------------------+