511 lines
No EOL
31 KiB
MQL5
511 lines
No EOL
31 KiB
MQL5
//+------------------------------------------------------------------+
|
|
//| Correlations.mqh |
|
|
//| Copyright 2025, Leo. |
|
|
//| https://www.mql5.com/es/users/nique_372/news |
|
|
//+------------------------------------------------------------------+
|
|
#property copyright "Copyright 2025, Leo."
|
|
#property link "https://www.mql5.com/es/users/nique_372/news"
|
|
#property strict
|
|
|
|
#ifndef AIFEATUREEVAL_CORRELATIONS_MQH
|
|
#define AIFEATUREEVAL_CORRELATIONS_MQH
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| Include |
|
|
//+------------------------------------------------------------------+
|
|
#include <Math\\Stat\\Math.mqh>
|
|
#include <Graphics\Graphic.mqh>
|
|
#include <TSN\\MQLArticles\\Utils\\File.mqh>
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| Estrcturas \ Defines \ Enums |
|
|
//+------------------------------------------------------------------+
|
|
//---
|
|
struct pack(sizeof(double)) CorrData
|
|
{
|
|
double origValue;
|
|
double absValue;
|
|
int index;
|
|
|
|
CorrData()
|
|
: origValue(0.0), absValue(0.0), index(0) {}
|
|
|
|
CorrData(const CorrData& other)
|
|
{
|
|
this = other;
|
|
}
|
|
};
|
|
|
|
//---
|
|
struct pack(sizeof(double)) CorrelationSort
|
|
{
|
|
double value_correlation;
|
|
int index_col;
|
|
};
|
|
|
|
//---
|
|
struct CorrelationAr
|
|
{
|
|
string cols_name;
|
|
double values[];
|
|
};
|
|
|
|
//---
|
|
#define CORR_SALIDA_ULTIMO_ELEMENTO -1
|
|
|
|
enum ENUM_CORRELATION_FUNCTION
|
|
{
|
|
F_CORRELATION_PEARSON = 0,
|
|
F_CORRELATION_SPEARMAN,
|
|
F_CORRELATION_KENDALL
|
|
};
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| Funciones |
|
|
//+------------------------------------------------------------------+
|
|
typedef double (*funcion_correlacion)(const double &x[], const double &y[]);
|
|
|
|
//+------------------------------------------------------------------+
|
|
double CorrPearson(const double &x[], const double &y[])
|
|
{
|
|
if(VerifyCorrelation(x, y) == false)
|
|
return 0;
|
|
|
|
double r;
|
|
MathCorrelationPearson(x, y, r);
|
|
return r;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
double CorrSpearman(const double &x[], const double &y[])
|
|
{
|
|
if(VerifyCorrelation(x, y) == false)
|
|
return 0;
|
|
|
|
double r;
|
|
MathCorrelationSpearman(x, y, r);
|
|
return r;
|
|
}
|
|
//+------------------------------------------------------------------+
|
|
double CorrKendall(const double &x[], const double &y[])
|
|
{
|
|
if(VerifyCorrelation(x, y) == false)
|
|
return 0;
|
|
|
|
double r;
|
|
MathCorrelationKendall(x, y, r);
|
|
return r;
|
|
}
|
|
|
|
|
|
|
|
//+------------------------------------------------------------------+
|
|
bool VerifyCorrelation(const double & x[], const double &y[])
|
|
{
|
|
if(x.Size() < 1)
|
|
{
|
|
FastLog(FUNCION_ACTUAL, ERROR_TEXT, "El tamaño del array x es invalido");
|
|
return false;
|
|
}
|
|
|
|
if(y.Size() < 1)
|
|
{
|
|
FastLog(FUNCION_ACTUAL, ERROR_TEXT, "El tamaño del array y es invalido");
|
|
return false;
|
|
}
|
|
|
|
if(x.Size() != y.Size())
|
|
{
|
|
FastLog(FUNCION_ACTUAL, ERROR_TEXT, StringFormat("El tamño del array x = %u y el tamaño del array y = %u, no coinciden", x.Size(), y.Size()));
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
funcion_correlacion GetRelationFunction(ENUM_CORRELATION_FUNCTION mode)
|
|
{
|
|
const static funcion_correlacion funciones[3] = {CorrPearson, CorrSpearman, CorrKendall};
|
|
return funciones[int(mode)];
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
class CCorrelations : public CLoggerBase
|
|
{
|
|
private:
|
|
//--- Target
|
|
double target_data[]; //data del target
|
|
string target_data_col_name;
|
|
|
|
//--- Data
|
|
double arr_out_correlation[]; //correlacion de cada columna
|
|
CorrelationAr correlation_matrix[]; //matrix de correlacoin (para plot, aqui se guarda la data y el nombre de la col)
|
|
CorrelationSort correlations_sort[];
|
|
|
|
//--- Correlacion
|
|
funcion_correlacion func;
|
|
|
|
|
|
//--- Plots
|
|
CGraphic graph;
|
|
|
|
public:
|
|
CCorrelations(void);
|
|
~CCorrelations(void) { Clear(); }
|
|
|
|
//--- Setters principales
|
|
bool Set(string file_name, bool common_flag, ushort csv_separator, bool csv_tiene_cabezera, int salida_col_index = CORR_SALIDA_ULTIMO_ELEMENTO);
|
|
bool Set(matrix &mtx, vector &out);
|
|
bool Set(CorrelationAr &data[], double &outputs[]);
|
|
|
|
//--- Setters extra
|
|
void CorrelationFuction(ENUM_CORRELATION_FUNCTION new_value) { this.func = GetRelationFunction(new_value); }
|
|
|
|
//--- Funciones generales
|
|
void GetBestCorrelations(int &indexes_cols[], int num_bests);
|
|
void Run();
|
|
|
|
//--- Limpieza
|
|
void Clear();
|
|
|
|
//--- Plot
|
|
void TargetColName(string new_value) { this.target_data_col_name = new_value; }
|
|
void PlotAnalisis(string prefix, string file_folder, int width, int height, color clr_out, int &idxs_column[], bool scale_1_0, double min_val = 0.0, double max_val = 1.0);
|
|
};
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
CCorrelations::CCorrelations(void)
|
|
: func(NULL), target_data_col_name("Target")
|
|
{
|
|
Clear();
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
void CCorrelations::GetBestCorrelations(int &indexes_cols[], int num_bests)
|
|
{
|
|
if(num_bests > (int)correlation_matrix.Size())
|
|
{
|
|
LogError(StringFormat("El numero de correlaciones esperadas = %d, es mayor a las disponibles = %u", num_bests, correlation_matrix.Size()), FUNCION_ACTUAL);
|
|
return;
|
|
}
|
|
|
|
ArrayResize(indexes_cols, 0);
|
|
int start = MathMax((ArraySize(correlations_sort) - num_bests), 0);
|
|
|
|
for(int i = start; i < ArraySize(correlations_sort) ; i++)
|
|
{
|
|
int val = correlations_sort[i].index_col;
|
|
Print("Correlation: ", correlations_sort[i].value_correlation);
|
|
Print("Col name: ", this.correlation_matrix[val].cols_name);
|
|
AddArrayNoVerification(indexes_cols, val, 0);
|
|
}
|
|
}
|
|
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
void CCorrelations::PlotAnalisis(string prefix, string file_folder, int width, int height, color clr_out, int &idxs_column[], bool scale_1_0, double min_val = 0.0, double max_val = 1.0)
|
|
{
|
|
if(idxs_column.Size() < 1)
|
|
{
|
|
LogError(StringFormat("El tamaño de las columnas a plotear = %u, es invalido", idxs_column.Size()), FUNCION_ACTUAL);
|
|
return;
|
|
}
|
|
|
|
//---
|
|
string file_name_corr = file_folder + prefix + "_graph_correlation.png";
|
|
string name_grap_corr = prefix + "graph_corr";
|
|
|
|
//--- Seteamos la curva out
|
|
graph.Create(0, name_grap_corr, 0, 0, 0, width, height);
|
|
|
|
if(scale_1_0)
|
|
{
|
|
double temp_arr[];
|
|
ArrayCopy(temp_arr, this.target_data);
|
|
ScaleArrayCustom(temp_arr, ArraySize(temp_arr), min_val, max_val);
|
|
graph.CurveAdd(temp_arr, clr_out, CURVE_POINTS_AND_LINES, target_data_col_name);
|
|
}
|
|
else
|
|
graph.CurveAdd(this.target_data, clr_out, CURVE_POINTS_AND_LINES, target_data_col_name);
|
|
|
|
//--- Seteamos las curvas restantes
|
|
for(int i = 0; i < ArraySize(idxs_column); i++)
|
|
{
|
|
int col = idxs_column[i];
|
|
|
|
if(scale_1_0)
|
|
{
|
|
double temp_arr[];
|
|
ArrayCopy(temp_arr, correlation_matrix[col].values);
|
|
ScaleArrayCustom(temp_arr, ArraySize(temp_arr), min_val, max_val);
|
|
graph.CurveAdd(temp_arr, CURVE_POINTS_AND_LINES, correlation_matrix[col].cols_name);
|
|
continue;
|
|
}
|
|
|
|
graph.CurveAdd(correlation_matrix[col].values, CURVE_POINTS_AND_LINES, correlation_matrix[col].cols_name);
|
|
}
|
|
|
|
//--- Seteamos los axis
|
|
graph.XAxis().Name("Numero de datos");
|
|
graph.XAxis().NameSize(12);
|
|
graph.YAxis().Name("Value");
|
|
graph.YAxis().NameSize(12);
|
|
graph.YAxis().ValuesWidth(15);
|
|
graph.CurvePlotAll();
|
|
graph.Update();
|
|
|
|
|
|
bool res = false;
|
|
if(ChartSetInteger(0, CHART_SHOW, false))
|
|
res = ChartScreenShot(0, file_name_corr, width, height);
|
|
ChartSetInteger(0, CHART_SHOW, true);
|
|
|
|
Sleep(2000);
|
|
graph.Destroy();
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
void CCorrelations::Run()
|
|
{
|
|
if(arr_out_correlation.Size() < 1)
|
|
{
|
|
LogError(StringFormat("El tamaño del array de correlaciones = %u, es invalido", arr_out_correlation.Size()), FUNCION_ACTUAL);
|
|
return;
|
|
}
|
|
|
|
//---
|
|
int size = (int)this.arr_out_correlation.Size();
|
|
|
|
//---
|
|
CorrData corrArray[];
|
|
ArrayResize(corrArray, size);
|
|
|
|
for(int i = 0; i < size; i++)
|
|
{
|
|
corrArray[i].absValue = fabs(arr_out_correlation[i]);
|
|
corrArray[i].origValue = arr_out_correlation[i];
|
|
corrArray[i].index = i;
|
|
}
|
|
|
|
//---
|
|
for(int i = 0; i < size - 1; i++)
|
|
{
|
|
for(int j = i + 1; j < size; j++)
|
|
{
|
|
if(corrArray[i].absValue > corrArray[j].absValue)
|
|
{
|
|
CorrData temp = corrArray[i];
|
|
corrArray[i] = corrArray[j];
|
|
corrArray[j] = temp;
|
|
}
|
|
}
|
|
}
|
|
|
|
//---
|
|
ArrayResize(this.correlations_sort, 0);
|
|
for(int i = 0; i < size; i++)
|
|
{
|
|
CorrelationSort cs;
|
|
cs.index_col = corrArray[i].index;
|
|
cs.value_correlation = corrArray[i].origValue;
|
|
AddArrayNoVerification(correlations_sort, cs, 0);
|
|
}
|
|
}
|
|
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
bool CCorrelations::Set(matrix &mtx, vector &out)
|
|
{
|
|
if(mtx.Rows() != out.Size())
|
|
{
|
|
LogError(StringFormat("Las filas de la matriz de data = %I64u, es diferente al tamaño del vector de target = %I64u", mtx.Rows(), out.Size()), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
//---
|
|
ArrayResize(correlation_matrix, (int)out.Size());
|
|
ArrayResize(arr_out_correlation, (int)out.Size());
|
|
VectorToDoubleArray(out, this.target_data);
|
|
|
|
//---
|
|
for(ulong i = 0; i < mtx.Cols(); i++)
|
|
{
|
|
double temp_arr[];
|
|
VectorToDoubleArray(mtx.Col(i), temp_arr);
|
|
arr_out_correlation[i] = func(temp_arr, this.target_data);
|
|
ArrayCopy(correlation_matrix[i].values, temp_arr);
|
|
correlation_matrix[i].cols_name = IntegerToString(i);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
bool CCorrelations::Set(CorrelationAr &data[], double &outputs[])
|
|
{
|
|
//---
|
|
ArrayResize(correlation_matrix, data.Size());
|
|
ArrayResize(arr_out_correlation, data.Size());
|
|
ArrayCopy(this.target_data, outputs);
|
|
|
|
//---
|
|
for(ulong i = 0; i < data.Size(); i++)
|
|
{
|
|
arr_out_correlation[i] = func(data[i].values, this.target_data); //Obtenemos correlacion de la columna -> vector
|
|
correlation_matrix[i] = data[i];
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
bool CCorrelations::Set(string file_name, bool common_flag, ushort csv_separator, bool csv_tiene_cabezera, int salida_col_index = -1)
|
|
{
|
|
matrix mtx;
|
|
string header;
|
|
if(!CSVToMatrix(file_name, mtx, common_flag, csv_tiene_cabezera, csv_separator, header))
|
|
{
|
|
LogError(StringFormat("No se pudo cargar el archivo = %s", file_name), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
//--- Declaraciones iniciales
|
|
ulong cols = mtx.Cols();
|
|
ulong rows = mtx.Rows();
|
|
|
|
//---
|
|
if(salida_col_index == CORR_SALIDA_ULTIMO_ELEMENTO)
|
|
salida_col_index = (int)cols - 1;
|
|
|
|
//---
|
|
vector output = mtx.Col(salida_col_index);
|
|
VectorToDoubleArray(output, this.target_data);
|
|
|
|
//---
|
|
ArrayResize(this.correlation_matrix, (int)cols - 1); //omitimos una columnas de salida
|
|
ArrayResize(this.arr_out_correlation, (int)cols - 1);
|
|
|
|
//--- Cols Name
|
|
if(csv_tiene_cabezera) //si tiene cabezera los nombres de las columnas que coincidadn con el nombre del csv
|
|
{
|
|
string res[];
|
|
int s;
|
|
if((s = StringSplit(header, csv_separator, res)) != cols)
|
|
{
|
|
LogError(StringFormat("El numero de columnas = %I64u es diferente al tamaño del heador = %d", cols, s), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
ulong curr_idx = 0;
|
|
for(ulong i = 0; i < cols - 1; i++)
|
|
{
|
|
if(i == salida_col_index)
|
|
{
|
|
this.target_data_col_name = res[i];
|
|
continue;
|
|
}
|
|
|
|
correlation_matrix[curr_idx].cols_name = res[i];
|
|
curr_idx++;
|
|
}
|
|
}
|
|
else //si no solo asignamos el indice
|
|
{
|
|
ulong curr_idx = 0;
|
|
for(ulong i = 0; i < cols; i++)
|
|
{
|
|
if(i == salida_col_index)
|
|
continue;
|
|
|
|
correlation_matrix[curr_idx].cols_name = IntegerToString(i);
|
|
curr_idx++;
|
|
}
|
|
}
|
|
|
|
//--- Finalizamos obteniendo la correalcion de cada columna
|
|
ulong curr_idx = 0;
|
|
for(ulong i = 0; i < cols; i++)
|
|
{
|
|
if(i == salida_col_index)
|
|
continue;
|
|
|
|
|
|
|
|
//---
|
|
double arr[];
|
|
VectorToDoubleArray(mtx.Col(i), arr);
|
|
ArrayCopy(correlation_matrix[curr_idx].values, arr);
|
|
|
|
//---
|
|
double correlacion = func(arr, this.target_data);
|
|
this.arr_out_correlation[curr_idx] = correlacion;
|
|
curr_idx++;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
void CCorrelations::Clear(void)
|
|
{
|
|
ArrayFree(this.arr_out_correlation);
|
|
ArrayFree(this.correlations_sort);
|
|
ArrayFree(correlation_matrix);
|
|
ArrayFree(target_data);
|
|
}
|
|
//+------------------------------------------------------------------+
|
|
void ScaleArrayCustom(double &arr[], int size, double target_min = 0.0, double target_max = 1.0)
|
|
{
|
|
if(size <= 1)
|
|
return;
|
|
|
|
//---
|
|
double data_min = arr[0];
|
|
double data_max = arr[0];
|
|
|
|
for(int i = 1; i < size; i++)
|
|
{
|
|
if(arr[i] < data_min)
|
|
data_min = arr[i];
|
|
if(arr[i] > data_max)
|
|
data_max = arr[i];
|
|
}
|
|
|
|
//---
|
|
double data_range = data_max - data_min;
|
|
if(data_range == 0.0)
|
|
{
|
|
double mid_point = (target_min + target_max) / 2.0;
|
|
for(int i = 0; i < size; i++)
|
|
arr[i] = mid_point;
|
|
|
|
return;
|
|
}
|
|
|
|
//---
|
|
double target_range = target_max - target_min;
|
|
for(int i = 0; i < size; i++)
|
|
{
|
|
double normalized = (arr[i] - data_min) / data_range;
|
|
arr[i] = target_min + (normalized * target_range);
|
|
}
|
|
}
|
|
//+------------------------------------------------------------------+
|
|
#endif // AIFEATUREEVAL_CORRELATIONS_MQH |