AiFeatureEval/Src/Correlations.mqh
2026-04-25 21:50:46 -05:00

511 lines
No EOL
31 KiB
MQL5

//+------------------------------------------------------------------+
//| Correlations.mqh |
//| Copyright 2025, Leo. |
//| https://www.mql5.com/es/users/nique_372/news |
//+------------------------------------------------------------------+
#property copyright "Copyright 2025, Leo."
#property link "https://www.mql5.com/es/users/nique_372/news"
#property strict
#ifndef AIFEATUREEVAL_CORRELATIONS_MQH
#define AIFEATUREEVAL_CORRELATIONS_MQH
//+------------------------------------------------------------------+
//| Include |
//+------------------------------------------------------------------+
#include <Math\\Stat\\Math.mqh>
#include <Graphics\Graphic.mqh>
#include <TSN\\MQLArticles\\Utils\\File.mqh>
//+------------------------------------------------------------------+
//| Estrcturas \ Defines \ Enums |
//+------------------------------------------------------------------+
//---
struct pack(sizeof(double)) CorrData
{
double origValue;
double absValue;
int index;
CorrData()
: origValue(0.0), absValue(0.0), index(0) {}
CorrData(const CorrData& other)
{
this = other;
}
};
//---
struct pack(sizeof(double)) CorrelationSort
{
double value_correlation;
int index_col;
};
//---
struct CorrelationAr
{
string cols_name;
double values[];
};
//---
#define CORR_SALIDA_ULTIMO_ELEMENTO -1
enum ENUM_CORRELATION_FUNCTION
{
F_CORRELATION_PEARSON = 0,
F_CORRELATION_SPEARMAN,
F_CORRELATION_KENDALL
};
//+------------------------------------------------------------------+
//| Funciones |
//+------------------------------------------------------------------+
typedef double (*funcion_correlacion)(const double &x[], const double &y[]);
//+------------------------------------------------------------------+
double CorrPearson(const double &x[], const double &y[])
{
if(VerifyCorrelation(x, y) == false)
return 0;
double r;
MathCorrelationPearson(x, y, r);
return r;
}
//+------------------------------------------------------------------+
double CorrSpearman(const double &x[], const double &y[])
{
if(VerifyCorrelation(x, y) == false)
return 0;
double r;
MathCorrelationSpearman(x, y, r);
return r;
}
//+------------------------------------------------------------------+
double CorrKendall(const double &x[], const double &y[])
{
if(VerifyCorrelation(x, y) == false)
return 0;
double r;
MathCorrelationKendall(x, y, r);
return r;
}
//+------------------------------------------------------------------+
bool VerifyCorrelation(const double & x[], const double &y[])
{
if(x.Size() < 1)
{
FastLog(FUNCION_ACTUAL, ERROR_TEXT, "El tamaño del array x es invalido");
return false;
}
if(y.Size() < 1)
{
FastLog(FUNCION_ACTUAL, ERROR_TEXT, "El tamaño del array y es invalido");
return false;
}
if(x.Size() != y.Size())
{
FastLog(FUNCION_ACTUAL, ERROR_TEXT, StringFormat("El tamño del array x = %u y el tamaño del array y = %u, no coinciden", x.Size(), y.Size()));
return false;
}
return true;
}
//+------------------------------------------------------------------+
funcion_correlacion GetRelationFunction(ENUM_CORRELATION_FUNCTION mode)
{
const static funcion_correlacion funciones[3] = {CorrPearson, CorrSpearman, CorrKendall};
return funciones[int(mode)];
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
class CCorrelations : public CLoggerBase
{
private:
//--- Target
double target_data[]; //data del target
string target_data_col_name;
//--- Data
double arr_out_correlation[]; //correlacion de cada columna
CorrelationAr correlation_matrix[]; //matrix de correlacoin (para plot, aqui se guarda la data y el nombre de la col)
CorrelationSort correlations_sort[];
//--- Correlacion
funcion_correlacion func;
//--- Plots
CGraphic graph;
public:
CCorrelations(void);
~CCorrelations(void) { Clear(); }
//--- Setters principales
bool Set(string file_name, bool common_flag, ushort csv_separator, bool csv_tiene_cabezera, int salida_col_index = CORR_SALIDA_ULTIMO_ELEMENTO);
bool Set(matrix &mtx, vector &out);
bool Set(CorrelationAr &data[], double &outputs[]);
//--- Setters extra
void CorrelationFuction(ENUM_CORRELATION_FUNCTION new_value) { this.func = GetRelationFunction(new_value); }
//--- Funciones generales
void GetBestCorrelations(int &indexes_cols[], int num_bests);
void Run();
//--- Limpieza
void Clear();
//--- Plot
void TargetColName(string new_value) { this.target_data_col_name = new_value; }
void PlotAnalisis(string prefix, string file_folder, int width, int height, color clr_out, int &idxs_column[], bool scale_1_0, double min_val = 0.0, double max_val = 1.0);
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
CCorrelations::CCorrelations(void)
: func(NULL), target_data_col_name("Target")
{
Clear();
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
void CCorrelations::GetBestCorrelations(int &indexes_cols[], int num_bests)
{
if(num_bests > (int)correlation_matrix.Size())
{
LogError(StringFormat("El numero de correlaciones esperadas = %d, es mayor a las disponibles = %u", num_bests, correlation_matrix.Size()), FUNCION_ACTUAL);
return;
}
ArrayResize(indexes_cols, 0);
int start = MathMax((ArraySize(correlations_sort) - num_bests), 0);
for(int i = start; i < ArraySize(correlations_sort) ; i++)
{
int val = correlations_sort[i].index_col;
Print("Correlation: ", correlations_sort[i].value_correlation);
Print("Col name: ", this.correlation_matrix[val].cols_name);
AddArrayNoVerification(indexes_cols, val, 0);
}
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
void CCorrelations::PlotAnalisis(string prefix, string file_folder, int width, int height, color clr_out, int &idxs_column[], bool scale_1_0, double min_val = 0.0, double max_val = 1.0)
{
if(idxs_column.Size() < 1)
{
LogError(StringFormat("El tamaño de las columnas a plotear = %u, es invalido", idxs_column.Size()), FUNCION_ACTUAL);
return;
}
//---
string file_name_corr = file_folder + prefix + "_graph_correlation.png";
string name_grap_corr = prefix + "graph_corr";
//--- Seteamos la curva out
graph.Create(0, name_grap_corr, 0, 0, 0, width, height);
if(scale_1_0)
{
double temp_arr[];
ArrayCopy(temp_arr, this.target_data);
ScaleArrayCustom(temp_arr, ArraySize(temp_arr), min_val, max_val);
graph.CurveAdd(temp_arr, clr_out, CURVE_POINTS_AND_LINES, target_data_col_name);
}
else
graph.CurveAdd(this.target_data, clr_out, CURVE_POINTS_AND_LINES, target_data_col_name);
//--- Seteamos las curvas restantes
for(int i = 0; i < ArraySize(idxs_column); i++)
{
int col = idxs_column[i];
if(scale_1_0)
{
double temp_arr[];
ArrayCopy(temp_arr, correlation_matrix[col].values);
ScaleArrayCustom(temp_arr, ArraySize(temp_arr), min_val, max_val);
graph.CurveAdd(temp_arr, CURVE_POINTS_AND_LINES, correlation_matrix[col].cols_name);
continue;
}
graph.CurveAdd(correlation_matrix[col].values, CURVE_POINTS_AND_LINES, correlation_matrix[col].cols_name);
}
//--- Seteamos los axis
graph.XAxis().Name("Numero de datos");
graph.XAxis().NameSize(12);
graph.YAxis().Name("Value");
graph.YAxis().NameSize(12);
graph.YAxis().ValuesWidth(15);
graph.CurvePlotAll();
graph.Update();
bool res = false;
if(ChartSetInteger(0, CHART_SHOW, false))
res = ChartScreenShot(0, file_name_corr, width, height);
ChartSetInteger(0, CHART_SHOW, true);
Sleep(2000);
graph.Destroy();
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
void CCorrelations::Run()
{
if(arr_out_correlation.Size() < 1)
{
LogError(StringFormat("El tamaño del array de correlaciones = %u, es invalido", arr_out_correlation.Size()), FUNCION_ACTUAL);
return;
}
//---
int size = (int)this.arr_out_correlation.Size();
//---
CorrData corrArray[];
ArrayResize(corrArray, size);
for(int i = 0; i < size; i++)
{
corrArray[i].absValue = fabs(arr_out_correlation[i]);
corrArray[i].origValue = arr_out_correlation[i];
corrArray[i].index = i;
}
//---
for(int i = 0; i < size - 1; i++)
{
for(int j = i + 1; j < size; j++)
{
if(corrArray[i].absValue > corrArray[j].absValue)
{
CorrData temp = corrArray[i];
corrArray[i] = corrArray[j];
corrArray[j] = temp;
}
}
}
//---
ArrayResize(this.correlations_sort, 0);
for(int i = 0; i < size; i++)
{
CorrelationSort cs;
cs.index_col = corrArray[i].index;
cs.value_correlation = corrArray[i].origValue;
AddArrayNoVerification(correlations_sort, cs, 0);
}
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool CCorrelations::Set(matrix &mtx, vector &out)
{
if(mtx.Rows() != out.Size())
{
LogError(StringFormat("Las filas de la matriz de data = %I64u, es diferente al tamaño del vector de target = %I64u", mtx.Rows(), out.Size()), FUNCION_ACTUAL);
return false;
}
//---
ArrayResize(correlation_matrix, (int)out.Size());
ArrayResize(arr_out_correlation, (int)out.Size());
VectorToDoubleArray(out, this.target_data);
//---
for(ulong i = 0; i < mtx.Cols(); i++)
{
double temp_arr[];
VectorToDoubleArray(mtx.Col(i), temp_arr);
arr_out_correlation[i] = func(temp_arr, this.target_data);
ArrayCopy(correlation_matrix[i].values, temp_arr);
correlation_matrix[i].cols_name = IntegerToString(i);
}
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool CCorrelations::Set(CorrelationAr &data[], double &outputs[])
{
//---
ArrayResize(correlation_matrix, data.Size());
ArrayResize(arr_out_correlation, data.Size());
ArrayCopy(this.target_data, outputs);
//---
for(ulong i = 0; i < data.Size(); i++)
{
arr_out_correlation[i] = func(data[i].values, this.target_data); //Obtenemos correlacion de la columna -> vector
correlation_matrix[i] = data[i];
}
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool CCorrelations::Set(string file_name, bool common_flag, ushort csv_separator, bool csv_tiene_cabezera, int salida_col_index = -1)
{
matrix mtx;
string header;
if(!CSVToMatrix(file_name, mtx, common_flag, csv_tiene_cabezera, csv_separator, header))
{
LogError(StringFormat("No se pudo cargar el archivo = %s", file_name), FUNCION_ACTUAL);
return false;
}
//--- Declaraciones iniciales
ulong cols = mtx.Cols();
ulong rows = mtx.Rows();
//---
if(salida_col_index == CORR_SALIDA_ULTIMO_ELEMENTO)
salida_col_index = (int)cols - 1;
//---
vector output = mtx.Col(salida_col_index);
VectorToDoubleArray(output, this.target_data);
//---
ArrayResize(this.correlation_matrix, (int)cols - 1); //omitimos una columnas de salida
ArrayResize(this.arr_out_correlation, (int)cols - 1);
//--- Cols Name
if(csv_tiene_cabezera) //si tiene cabezera los nombres de las columnas que coincidadn con el nombre del csv
{
string res[];
int s;
if((s = StringSplit(header, csv_separator, res)) != cols)
{
LogError(StringFormat("El numero de columnas = %I64u es diferente al tamaño del heador = %d", cols, s), FUNCION_ACTUAL);
return false;
}
ulong curr_idx = 0;
for(ulong i = 0; i < cols - 1; i++)
{
if(i == salida_col_index)
{
this.target_data_col_name = res[i];
continue;
}
correlation_matrix[curr_idx].cols_name = res[i];
curr_idx++;
}
}
else //si no solo asignamos el indice
{
ulong curr_idx = 0;
for(ulong i = 0; i < cols; i++)
{
if(i == salida_col_index)
continue;
correlation_matrix[curr_idx].cols_name = IntegerToString(i);
curr_idx++;
}
}
//--- Finalizamos obteniendo la correalcion de cada columna
ulong curr_idx = 0;
for(ulong i = 0; i < cols; i++)
{
if(i == salida_col_index)
continue;
//---
double arr[];
VectorToDoubleArray(mtx.Col(i), arr);
ArrayCopy(correlation_matrix[curr_idx].values, arr);
//---
double correlacion = func(arr, this.target_data);
this.arr_out_correlation[curr_idx] = correlacion;
curr_idx++;
}
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
void CCorrelations::Clear(void)
{
ArrayFree(this.arr_out_correlation);
ArrayFree(this.correlations_sort);
ArrayFree(correlation_matrix);
ArrayFree(target_data);
}
//+------------------------------------------------------------------+
void ScaleArrayCustom(double &arr[], int size, double target_min = 0.0, double target_max = 1.0)
{
if(size <= 1)
return;
//---
double data_min = arr[0];
double data_max = arr[0];
for(int i = 1; i < size; i++)
{
if(arr[i] < data_min)
data_min = arr[i];
if(arr[i] > data_max)
data_max = arr[i];
}
//---
double data_range = data_max - data_min;
if(data_range == 0.0)
{
double mid_point = (target_min + target_max) / 2.0;
for(int i = 0; i < size; i++)
arr[i] = mid_point;
return;
}
//---
double target_range = target_max - target_min;
for(int i = 0; i < size; i++)
{
double normalized = (arr[i] - data_min) / data_range;
arr[i] = target_min + (normalized * target_range);
}
}
//+------------------------------------------------------------------+
#endif // AIFEATUREEVAL_CORRELATIONS_MQH