ScalerByLeo/Src/ScalerBase.mqh
Nique_372 0cc42ae7f5
2026-01-01 21:54:18 -05:00

1570 lines
97 KiB
MQL5

//+------------------------------------------------------------------+
//| ScalerBase.mqh |
//| Copyright 2025, Leo. |
//| https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2025, Leo."
#property link "https://www.mql5.com/en/users/nique_372"
#property strict
#ifndef SCALERBYLEO_SCALER_BASE_MQH
#define SCALERBYLEO_SCALER_BASE_MQH
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
#include "..\\..\\MQLArticles\\Utils\\Basic.mqh"
#include "..\\..\\MQLArticles\\Utils\\File.mqh"
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
#define SCALER_BASE_FINAL_LINE -1
#define SCALER_BASE_MIN_VALUE 1e-10
//+------------------------------------------------------------------+
//| Scaler Base |
//+------------------------------------------------------------------+
class ScalerBase : public CLoggerBase
{
protected:
string file_name_out;
string prefix_file;
bool loaded_scaler;
bool use_custom; //Bandera para saber si se usa custom (true) o excluyed (false)
ulong start_col;
ulong count_cols;
ulong excluyed_cols;
virtual bool Save() = 0;
virtual bool Load(string prefix_name) = 0;
//--- Métodos auxiliares
bool CheckSizeCustom(const matrix &mtx) const;
bool CheckSizeExcluded(const matrix &mtx) const;
bool CheckSizeCustom(const vector &v) const;
bool CheckSizeExcluded(const vector &v) const;
//---
matrix ExtractMatrixToScale(const matrix &X) const;
matrix ReconstructMatrix(const matrix &X_original, const matrix &X_scaled) const;
vector ExtractVectorToScale(const vector &X) const;
vector ReconstructVector(const vector &X_original, const vector &X_scaled) const;
public:
ScalerBase(void);
~ScalerBase(void) {}
//---
inline void SetRangeEscaler(ulong start_col_, ulong count_col_); //Custom
inline void SetRangeEscaler(ulong excluyed_cols_ = 1); //Mas simple, el usuario decide cuantas columnas empezando por atras se excluyen
//---
inline bool save(string prefix_name);
inline bool load(string prefix_name);
//--- Transform
//- Fit
// Matrix
virtual matrix fit_transform(const matrix &X, bool save_data) = 0;
// Transorfmra un archivo
bool fit_transform_file(const string &file_name, ushort sep, int extra_file_flags, bool save_data, uint file_code_page = CP_ACP, int start_line = 1);
// Transforma un archivo y devuelve la matriz
matrix fit_transform_by_file(const string& file_name, ushort sep, int file_flags, bool save_data, uint file_code_page = CP_ACP,
int start_line = 1, int end_line = SCALER_BASE_FINAL_LINE);
// Transforma un string
matrix fit_transform_by_src(const string& src, ushort sep, bool save_data, int str_start_line = 1, int str_end_line = SCALER_BASE_FINAL_LINE); // Transforma string
// Vector
virtual vector fit_transform(const vector &X) = 0; //Para vectores no se guarda data
//- Normal
virtual matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
virtual vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
virtual matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
virtual vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
// --- Métodos comunes ---
virtual inline string GetOutputFile() const final { return this.file_name_out; }
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
ScalerBase::ScalerBase(void)
: start_col(0), count_cols(0), use_custom(false), excluyed_cols(1), file_name_out(NULL), loaded_scaler(false)
{
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
inline void ScalerBase::SetRangeEscaler(ulong start_col_, ulong count_col_)
{
this.use_custom = true;
this.count_cols = count_col_;
this.start_col = start_col_;
LogInfo(StringFormat("Configurado escalado CUSTOM: columnas %I64u a %I64u (%I64u columnas)", start_col_, start_col_ + count_col_ - 1, count_col_), FUNCION_ACTUAL);
}
//+------------------------------------------------------------------+
inline void ScalerBase::SetRangeEscaler(ulong excluyed_cols_ = 1)
{
this.excluyed_cols = excluyed_cols_;
this.use_custom = false;
LogInfo(StringFormat("Configurado escalado EXCLUDED: excluir últimas %I64u columnas", excluyed_cols_), FUNCION_ACTUAL);
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool ScalerBase::CheckSizeCustom(const matrix &mtx) const
{
if(start_col >= mtx.Cols())
{
LogError(StringFormat("Columna de inicio %I64u >= total columnas %I64u", start_col, mtx.Cols()), FUNCION_ACTUAL);
return false;
}
if(start_col + count_cols > mtx.Cols())
{
LogError(StringFormat("Rango [%I64u:%I64u] excede columnas disponibles %I64u", start_col, start_col + count_cols - 1, mtx.Cols()), FUNCION_ACTUAL);
return false;
}
return true;
}
//+------------------------------------------------------------------+
bool ScalerBase::CheckSizeExcluded(const matrix &mtx) const
{
if(mtx.Cols() < excluyed_cols)
{
LogError(StringFormat("Columnas a excluir %I64u >= total columnas %I64u", excluyed_cols, mtx.Cols()), FUNCION_ACTUAL);
return false;
}
return true;
}
//+------------------------------------------------------------------+
bool ScalerBase::CheckSizeCustom(const vector &v) const
{
if(start_col >= v.Size())
{
LogError(StringFormat("Columna de inicio %I64u >= tamaño total del vector%I64u", start_col, v.Size()), FUNCION_ACTUAL);
return false;
}
if(start_col + count_cols > v.Size())
{
LogError(StringFormat("Rango [%I64u:%I64u] excede el tamaño del vector %I64u", start_col, start_col + count_cols - 1, v.Size()), FUNCION_ACTUAL);
return false;
}
return true;
}
//+------------------------------------------------------------------+
bool ScalerBase::CheckSizeExcluded(const vector &v) const
{
if(v.Size() < excluyed_cols)
{
LogError(StringFormat("Columnas a excluir %I64u >= tamaño del vector: %I64u", excluyed_cols, v.Size()), FUNCION_ACTUAL);
return false;
}
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
matrix ScalerBase::ExtractMatrixToScale(const matrix &X) const
{
matrix result;
if(use_custom)
{
if(X.Cols() == count_cols)
return X;
result.Init(X.Rows(), count_cols);
for(ulong row = 0; row < X.Rows(); row++)
for(ulong col = 0; col < count_cols; col++)
result[row][col] = X[row][start_col + col];
}
else
{
if(excluyed_cols == 0)
return X;
ulong cols_to_scale = X.Cols() - excluyed_cols;
result.Init(X.Rows(), cols_to_scale);
for(ulong row = 0; row < X.Rows(); row++)
for(ulong col = 0; col < cols_to_scale; col++)
result[row][col] = X[row][col];
}
return result;
}
//+------------------------------------------------------------------+
matrix ScalerBase::ReconstructMatrix(const matrix &X_original, const matrix &X_scaled) const
{
if(X_original.Rows() == X_scaled.Rows() && X_original.Cols() == X_scaled.Cols())
return X_scaled;
matrix result = X_original; // Copia completa
if(use_custom)
{
for(ulong row = 0; row < X_original.Rows(); row++)
for(ulong col = 0; col < count_cols; col++)
result[row][start_col + col] = X_scaled[row][col];
}
else
{
for(ulong row = 0; row < X_original.Rows(); row++)
for(ulong col = 0; col < X_scaled.Cols(); col++)
result[row][col] = X_scaled[row][col];
}
return result;
}
//+------------------------------------------------------------------+
vector ScalerBase::ExtractVectorToScale(const vector &X) const
{
vector result;
if(use_custom)
{
if(X.Size() == count_cols)
return X;
// Extraer rango específico
result.Resize(count_cols);
for(ulong i = 0; i < count_cols; i++)
result[i] = X[start_col + i];
}
else
{
if(excluyed_cols == 0)
return X;
// Extraer todas excepto las últimas N
ulong size_to_scale = X.Size() - excluyed_cols;
result.Resize(size_to_scale);
for(ulong i = 0; i < size_to_scale; i++)
result[i] = X[i];
}
return result;
}
//+------------------------------------------------------------------+
vector ScalerBase::ReconstructVector(const vector &X_original, const vector &X_scaled) const
{
if(X_original.Size() == X_scaled.Size())
return X_scaled;
vector result = X_original;
if(use_custom)
{
// Reemplazar rango específico
for(ulong i = 0; i < count_cols; i++)
result[start_col + i] = X_scaled[i];
}
else
{
// Reemplazar todas excepto las últimas N
for(ulong i = 0; i < X_scaled.Size(); i++)
result[i] = X_scaled[i];
}
return result;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
inline bool ScalerBase::load(string prefix_name)
{
loaded_scaler = true;
return this.Load(prefix_name);
}
//+------------------------------------------------------------------+
inline bool ScalerBase::save(string prefix_name)
{
this.file_name_out = prefix_name + this.prefix_file;
return this.Save();
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
/*
Notas:
- file_flags: puede tener los valores de FILE_ANSI, FILE_COMON, etc.. las banderas (FILE_READ) ya bienen por derfecto
*/
//+------------------------------------------------------------------+
matrix ScalerBase::fit_transform_by_file(const string& file_name, ushort sep, int file_flags, bool save_data, uint file_code_page = CP_ACP,
int start_line = 1, int end_line = SCALER_BASE_FINAL_LINE)
{
//---
ResetLastError();
//---
matrix m = {};
const int file_handle = FileOpen(file_name, (file_flags | FILE_READ | FILE_TXT), '\n', file_code_page);
if(file_handle == INVALID_HANDLE)
{
LogCriticalError(StringFormat("Error al abrir el archivo = %s, ultimo erorr = %d", file_name, GetLastError()), FUNCION_ACTUAL);
return m;
}
//---
int curr_line = 0;
string arr[];
int dt = 0;
int curr_row = 0;
string line = "";
//--- Se lee hasta el final
if(end_line == SCALER_BASE_FINAL_LINE)
{
//---
while(!FileIsEnding(file_handle))
{
//---
line = FileReadString(file_handle);
//---
if(curr_line >= start_line)
{
//---
if((dt = StringSplit(line, sep, arr)) < 1)
{
LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL);
FileClose(file_handle);
return m;
}
//---
m.Resize(curr_row + 1, dt);
for(int i = 0; i < dt; i++)
{
m[curr_row][i] = double(arr[i]);
}
//---
curr_row++;
}
//---
curr_line++;
}
}
else
{
//---
while(!FileIsEnding(file_handle))
{
//---
line = FileReadString(file_handle);
//---
if(curr_line >= start_line)
{
//---
if((dt = StringSplit(line, sep, arr)) < 1)
{
LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL);
FileClose(file_handle);
return m;
}
//---
m.Resize(curr_row + 1, dt);
for(int i = 0; i < dt; i++)
{
m[curr_row][i] = double(arr[i]);
}
//---
curr_row++;
}
//---
curr_line++;
if(curr_line > end_line)
break;
}
}
//---
FileClose(file_handle);
//---
return fit_transform(m, save_data);
}
//+------------------------------------------------------------------+
bool ScalerBase::fit_transform_file(const string &file_name, ushort sep, int extra_file_flags, bool save_data, uint file_code_page = CP_ACP, int start_line = 1)
{
//---
ResetLastError();
//---
matrix m = {};
int file_handle = FileOpen(file_name, (extra_file_flags | FILE_TXT | FILE_READ), '\n', file_code_page);
if(file_handle == INVALID_HANDLE)
{
LogCriticalError(StringFormat("Error al abrir el archivo = %s, ultimo erorr = %d", file_name, GetLastError()), FUNCION_ACTUAL);
return false;
}
//---
int curr_line = 0;
string arr[];
int dt = 0;
int curr_row = 0;
string line = "";
string extra[];
ArrayResize(extra, 0);
//--- Se lee hasta el final
while(!FileIsEnding(file_handle))
{
//---
line = FileReadString(file_handle);
//PrintFormat("'%s'", line);
//---
if(curr_line >= start_line)
{
//---
if((dt = StringSplit(line, sep, arr)) < 1)
{
LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL);
FileClose(file_handle);
return false;
}
//---
m.Resize(curr_row + 1, dt);
for(int i = 0; i < dt; i++)
{
m[curr_row][i] = double(arr[i]);
}
//---
curr_row++;
}
else
{
extra[ArrayResize(extra, ArraySize(extra) + 1) - 1] = line;
}
//---
curr_line++;
}
//--- Transformamos y cerramos
m = fit_transform(m, save_data);
FileClose(file_handle);
//--- Reescribir
// Abrimos pero en escritura
file_handle = FileOpen(file_name, (extra_file_flags | FILE_TXT | FILE_WRITE), 0, file_code_page);
if(file_handle == INVALID_HANDLE)
{
LogCriticalError(StringFormat("Error al abrir el archivo = %s, ultimo erorr = %d", file_name, GetLastError()), FUNCION_ACTUAL);
return false;
}
// Init line
for(int i = 0; i < ArraySize(extra); i++)
FileWrite(file_handle, extra[i]);
// Matrix
const ulong rows = m.Rows();
const ulong cols = m.Cols();
for(ulong row = 0; row < rows; row++)
{
//---
line = "";
for(ulong col = 0; col < cols; col++)
{
line += string(m[row][col]) + ", ";
}
StringSetLength(line, StringLen(line) - 2);
//---
FileWrite(file_handle, line);
}
//--- Ceramos el archivo
FileClose(file_handle);
//---
return true;
}
//+------------------------------------------------------------------+
matrix ScalerBase::fit_transform_by_src(const string& src, ushort sep, bool save_data, int str_start_line = 1, int str_end_line = SCALER_BASE_FINAL_LINE)
{
//---
matrix m = {};
//---
int curr_line = 0;
string arr[];
int dt = 0;
int curr_row = 0;
string general_arr[];
string line = "";
//---
const int total_row = StringSplit(src, '\n', general_arr);
if(total_row <= str_start_line)
{
LogError(StringFormat("Numero de filas del string %d es invalida", total_row), FUNCION_ACTUAL);
return m;
}
//--- Se lee hasta el final
if(str_end_line == SCALER_BASE_FINAL_LINE)
{
//---
for(int r = 0; r < total_row; r++)
{
//---
line = general_arr[r];
//---
if(curr_line >= str_start_line)
{
//---
if((dt = StringSplit(line, sep, arr)) < 1)
{
LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL);
return m;
}
//---
m.Resize(curr_row + 1, dt);
for(int i = 0; i < dt; i++)
{
m[curr_row][i] = double(arr[i]);
}
//---
curr_row++;
}
//---
curr_line++;
}
}
else
{
//---
for(int r = 0; r < total_row; r++)
{
//---
line = general_arr[r];
//---
if(curr_line >= str_start_line)
{
//---
if((dt = StringSplit(line, sep, arr)) < 1)
{
LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL);
return m;
}
//---
m.Resize(curr_row + 1, dt);
for(int i = 0; i < dt; i++)
{
m[curr_row][i] = double(arr[i]);
}
//---
curr_row++;
}
//---
curr_line++;
if(curr_line > str_end_line)
break;
}
}
//---
return fit_transform(m, save_data);
}
//+------------------------------------------------------------------+
//| Standardization Scaler |
//+------------------------------------------------------------------+
class StandardizationScaler : public ScalerBase
{
protected:
vector mean, std;
bool Save() override;
bool Load(string prefix_name) override;
public:
StandardizationScaler() : ScalerBase() { this.prefix_file = "_mean_std.csv"; }
matrix fit_transform(const matrix &X, bool save_data) override;
vector fit_transform(const vector &X) override; //Para vectores no se guarda data
matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool StandardizationScaler::Save()
{
FileDelete(this.file_name_out);
ResetLastError();
int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON);
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
FileWrite(handle, vector_to_string(this.mean));
FileWrite(handle, vector_to_string(this.std));
FileWrite(handle, count_cols);
FileWrite(handle, start_col);
FileWrite(handle, excluyed_cols);
FileWrite(handle, (int)use_custom);
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
bool StandardizationScaler::Load(string prefix_name)
{
this.file_name_out = prefix_name + this.prefix_file;
ResetLastError();
int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n");
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
this.mean = string_to_vector(FileReadString(handle));
this.std = string_to_vector(FileReadString(handle));
this.count_cols = (ulong)StringToInteger(FileReadString(handle));
this.start_col = (ulong)StringToInteger(FileReadString(handle));
this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle));
this.use_custom = (bool)StringToInteger(FileReadString(handle));
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
vector StandardizationScaler::fit_transform(const vector &X)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
vector X_to_scaled = ExtractVectorToScale(X);
//---
double mean_val = X_to_scaled.Mean();
double std_val = X_to_scaled.Std();
if(std_val < SCALER_BASE_MIN_VALUE)
std_val = 1.0;
//---
for(ulong i = 0; i < X_to_scaled.Size(); i++)
X_to_scaled[i] = (X_to_scaled[i] - mean_val) / std_val;
//---
return ReconstructVector(X, X_to_scaled);
}
//+------------------------------------------------------------------+
matrix StandardizationScaler::fit_transform(const matrix &X, bool save_data)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL);
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = ExtractMatrixToScale(X);
const ulong X_to_scale_cols = X_to_scale.Cols();
const ulong X_to_scale_rows = X_to_scale.Rows();
LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale.Cols()), FUNCION_ACTUAL);
//---
vector mean_cts(X_to_scale_cols);
vector std_cts(X_to_scale_cols);
//---
for(ulong i = 0; i < X_to_scale_cols; i++)
{
mean_cts[i] = X_to_scale.Col(i).Mean();
std_cts[i] = X_to_scale.Col(i).Std();
// Evitar división por cero
if(std_cts[i] < SCALER_BASE_MIN_VALUE)
{
LogWarning(StringFormat("Columna %I64u tiene std muy pequeño (%.2e), usando 1.0", i, std_cts[i]), FUNCION_ACTUAL);
std_cts[i] = 1.0;
}
}
//---
for(ulong row = 0; row < X_to_scale_rows; row++)
for(ulong col = 0; col < X_to_scale_cols; col++)
X_to_scale[row][col] = (X_to_scale[row][col] - mean_cts[col]) / std_cts[col];
//---
if(save_data)
{
this.mean = mean_cts;
this.std = std_cts;
}
//--- Aqui siempre se reconstruye
return ReconstructMatrix(X, X_to_scale);
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
matrix StandardizationScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
return X;
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X;
const ulong X_to_scale_cols = X_to_scale.Cols();
const ulong X_to_scale_rows = X_to_scale.Rows();
//---
if(X_to_scale_cols != this.mean.Size())
{
LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale_cols, this.mean.Size()), FUNCION_ACTUAL);
return X;
}
//---
for(ulong row = 0; row < X_to_scale_rows; row++)
for(ulong col = 0; col < X_to_scale_cols; col++)
X_to_scale[row][col] = (X_to_scale[row][col] - this.mean[col]) / this.std[col];
//---
return (reconstruir) ? ReconstructMatrix(X, X_to_scale) : X_to_scale;
}
//+------------------------------------------------------------------+
vector StandardizationScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
return X;
}
//---
vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X;
const ulong X_to_scale_size = X_to_scale.Size();
if(X_to_scale_size != this.mean.Size())
{
LogError(StringFormat("Elementos a escalar %I64 != elementos entrenados %u", X_to_scale_size, this.mean.Size()), FUNCION_ACTUAL);
return X;
}
//---
for(ulong i = 0; i < X_to_scale_size; i++)
X_to_scale[i] = (X_to_scale[i] - this.mean[i]) / this.std[i];
//---
return (reconstruir) ? ReconstructVector(X, X_to_scale) : X_to_scale;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
matrix StandardizationScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
//---
matrix X_to_unscale = (solo_escalar_lo_previsto) ? ExtractMatrixToScale(X_scaled) : X_scaled;
const ulong X_to_unscale_cols = X_to_unscale.Cols();
const ulong X_to_unscale_rows = X_to_unscale.Rows();
//---
if(X_to_unscale_cols != this.mean.Size())
{
LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale_cols, this.mean.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
for(ulong row = 0; row < X_to_unscale_rows; row++)
for(ulong col = 0; col < X_to_unscale_cols; col++)
X_to_unscale[row][col] = X_to_unscale[row][col] * this.std[col] + this.mean[col];
//---
return (reconstruir) ? ReconstructMatrix(X_scaled, X_to_unscale) : X_to_unscale;
}
//+------------------------------------------------------------------+
vector StandardizationScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
//---
vector X_to_unscale = (solo_escalar_lo_previsto) ? ExtractVectorToScale(X_scaled) : X_scaled;
const ulong X_to_unscale_size = X_to_unscale.Size();
//---
if(X_to_unscale_size != this.mean.Size())
{
LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale_size, this.mean.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
for(ulong i = 0; i < X_to_unscale_size; i++)
X_to_unscale[i] = X_to_unscale[i] * this.std[i] + this.mean[i];
//---
return (reconstruir) ? ReconstructVector(X_scaled, X_to_unscale) : X_to_unscale;
}
//+------------------------------------------------------------------+
//| MaxMin Scaler |
//+------------------------------------------------------------------+
class MaxMinScaler : public ScalerBase
{
protected:
vector min_vals, max_vals;
bool Save() override;
bool Load(string prefix_name) override;
public:
MaxMinScaler() : ScalerBase() { this.prefix_file = "_min_max.csv"; }
vector fit_transform(const vector &X) override;
matrix fit_transform(const matrix &X, bool save_data) override;
matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool MaxMinScaler::Save()
{
FileDelete(this.file_name_out);
ResetLastError();
//---
int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON);
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
//---
FileWrite(handle, vector_to_string(this.min_vals));
FileWrite(handle, vector_to_string(this.max_vals));
FileWrite(handle, count_cols);
FileWrite(handle, start_col);
FileWrite(handle, excluyed_cols);
FileWrite(handle, (int)use_custom);
//---
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
bool MaxMinScaler::Load(string prefix_name)
{
this.file_name_out = prefix_name + this.prefix_file;
//---
ResetLastError();
int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n");
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
//---
this.min_vals = string_to_vector(FileReadString(handle));
this.max_vals = string_to_vector(FileReadString(handle));
this.count_cols = (ulong)StringToInteger(FileReadString(handle));
this.start_col = (ulong)StringToInteger(FileReadString(handle));
this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle));
this.use_custom = (bool)StringToInteger(FileReadString(handle));
//---
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
vector MaxMinScaler::fit_transform(const vector &X)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
vector X_to_scale = ExtractVectorToScale(X);
//---
double max = X_to_scale.Max();
double min = X_to_scale.Min();
if(max - min < SCALER_BASE_MIN_VALUE)
max = min + 1.0;
//---
for(ulong i = 0; i < X_to_scale.Size(); i++)
X_to_scale[i] = (X_to_scale[i] - min) / (max - min);
//---
return ReconstructVector(X, X_to_scale);
}
//+------------------------------------------------------------------+
matrix MaxMinScaler::fit_transform(const matrix &X, bool save_data)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL);
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = ExtractMatrixToScale(X);
const ulong X_to_scale_cols = X_to_scale.Cols();
const ulong X_to_scale_rows = X_to_scale.Rows();
LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale_cols), FUNCION_ACTUAL);
//---
vector min_vals_cts(X_to_scale_cols);
vector max_vals_cts(X_to_scale_cols);
//---
for(ulong i = 0; i < X_to_scale_cols; i++)
{
min_vals_cts[i] = X_to_scale.Col(i).Min();
max_vals_cts[i] = X_to_scale.Col(i).Max();
if(fabs(max_vals_cts[i] - min_vals_cts[i]) < SCALER_BASE_MIN_VALUE)
{
LogWarning(StringFormat("Columna %I64u tiene rango muy pequeño (%.2e), usando rango 1.0", i, max_vals_cts[i] - min_vals_cts[i]), FUNCION_ACTUAL);
max_vals_cts[i] = min_vals_cts[i] + 1.0;
}
}
//---
//Print(min_vals_cts);
//---
for(ulong row = 0; row < X_to_scale_rows; row++)
for(ulong col = 0; col < X_to_scale_cols; col++)
X_to_scale[row][col] = (X_to_scale[row][col] - min_vals_cts[col]) / (max_vals_cts[col] - min_vals_cts[col]);
//---
if(save_data)
{
this.min_vals = min_vals_cts;
this.max_vals = max_vals_cts;
}
//--- Aqui siempre se reconstruye
return ReconstructMatrix(X, X_to_scale);
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
matrix MaxMinScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
return X;
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X;
const ulong X_to_scale_cols = X_to_scale.Cols();
const ulong X_to_scale_rows = X_to_scale.Rows();
//---
if(X_to_scale_cols != this.min_vals.Size())
{
LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale_cols, this.min_vals.Size()), FUNCION_ACTUAL);
return X;
}
//---
for(ulong row = 0; row < X_to_scale_rows; row++)
for(ulong col = 0; col < X_to_scale_cols; col++)
X_to_scale[row][col] = (X_to_scale[row][col] - this.min_vals[col]) / (this.max_vals[col] - this.min_vals[col]);
//---
return (reconstruir) ? ReconstructMatrix(X, X_to_scale) : X_to_scale;
}
//+------------------------------------------------------------------+
vector MaxMinScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
return X;
}
//---
vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X;
const ulong X_to_scale_size = X_to_scale.Size();
if(X_to_scale_size != this.min_vals.Size())
{
LogError(StringFormat("Elementos a escalar %I64u != elementos entrenados %u", X_to_scale_size, this.min_vals.Size()), FUNCION_ACTUAL);
return X;
}
//---
for(ulong i = 0; i < X_to_scale_size; i++)
X_to_scale[i] = (X_to_scale[i] - this.min_vals[i]) / (this.max_vals[i] - this.min_vals[i]);
//---
return (reconstruir) ? ReconstructVector(X, X_to_scale) : X_to_scale;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
matrix MaxMinScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
//---
matrix X_to_unscale = (solo_escalar_lo_previsto) ? ExtractMatrixToScale(X_scaled) : X_scaled;
const ulong X_to_unscale_cols = X_to_unscale.Cols();
const ulong X_to_unscale_rows = X_to_unscale.Rows();
//---
if(X_to_unscale_cols != this.min_vals.Size())
{
LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale_cols, this.min_vals.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
for(ulong row = 0; row < X_to_unscale_rows; row++)
for(ulong col = 0; col < X_to_unscale_cols; col++)
X_to_unscale[row][col] = X_to_unscale[row][col] * (this.max_vals[col] - this.min_vals[col]) + this.min_vals[col];
//---
return (reconstruir) ? ReconstructMatrix(X_scaled, X_to_unscale) : X_to_unscale;
}
//+------------------------------------------------------------------+
vector MaxMinScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
//---
vector X_to_unscale = (solo_escalar_lo_previsto) ? ExtractVectorToScale(X_scaled) : X_scaled;
const ulong X_to_unscale_size = X_to_unscale.Size();
//---
if(X_to_unscale_size != this.min_vals.Size())
{
LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale_size, this.min_vals.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
for(ulong i = 0; i < X_to_unscale_size; i++)
X_to_unscale[i] = X_to_unscale[i] * (this.max_vals[i] - this.min_vals[i]) + this.min_vals[i];
//---
return (reconstruir) ? ReconstructVector(X_scaled, X_to_unscale) : X_to_unscale;
}
//+------------------------------------------------------------------+
//| Robust Scaler |
//+------------------------------------------------------------------+
class RobustScaler : public ScalerBase
{
protected:
vector medians, iqrs;
bool Save() override;
bool Load(string prefix_name) override;
public:
RobustScaler() : ScalerBase() { this.prefix_file = "_median_iqr.csv"; }
vector fit_transform(const vector &X) override;
matrix fit_transform(const matrix &X, bool save_data) override;
matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool RobustScaler::Save()
{
FileDelete(this.file_name_out);
ResetLastError();
//---
int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON);
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
//---
FileWrite(handle, vector_to_string(this.medians));
FileWrite(handle, vector_to_string(this.iqrs));
FileWrite(handle, count_cols);
FileWrite(handle, start_col);
FileWrite(handle, excluyed_cols);
FileWrite(handle, (int)use_custom);
//---
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
bool RobustScaler::Load(string prefix_name)
{
this.file_name_out = prefix_name + this.prefix_file;
//---
ResetLastError();
int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n");
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
//---
this.medians = string_to_vector(FileReadString(handle));
this.iqrs = string_to_vector(FileReadString(handle));
this.count_cols = (ulong)StringToInteger(FileReadString(handle));
this.start_col = (ulong)StringToInteger(FileReadString(handle));
this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle));
this.use_custom = (bool)StringToInteger(FileReadString(handle));
//---
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
vector RobustScaler::fit_transform(const vector &X)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
vector X_to_scale = ExtractVectorToScale(X);
//---
double medians_cts = X_to_scale.Median();
double q75 = X_to_scale.Percentile(75);
double q25 = X_to_scale.Percentile(25);
double iqrs_cts = q75 - q25;
if(fabs(iqrs_cts) < SCALER_BASE_MIN_VALUE)
iqrs_cts = 1.00;
//---
for(ulong i = 0; i < X_to_scale.Size(); i++)
X_to_scale[i] = (X_to_scale[i] - medians_cts) / iqrs_cts;
//---
return ReconstructVector(X, X_to_scale);
}
//+------------------------------------------------------------------+
matrix RobustScaler::fit_transform(const matrix &X, bool save_data)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL);
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = ExtractMatrixToScale(X);
const ulong X_to_scale_cols = X_to_scale.Cols();
const ulong X_to_scale_rows = X_to_scale.Rows();
LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale_cols), FUNCION_ACTUAL);
//---
vector medians_cts(X_to_scale_cols);
vector iqrs_cts(X_to_scale_cols);
//---
for(ulong i = 0; i < X_to_scale_cols; i++)
{
vector col = X_to_scale.Col(i);
medians_cts[i] = col.Median();
double q75 = col.Percentile(75);
double q25 = col.Percentile(25);
iqrs_cts[i] = q75 - q25;
if(fabs(iqrs_cts[i]) < SCALER_BASE_MIN_VALUE)
{
LogWarning(StringFormat("Columna %I64u tiene IQR muy pequeño (%.2e), usando 1.0", i, iqrs_cts[i]), FUNCION_ACTUAL);
iqrs_cts[i] = 1.0;
}
}
//---
for(ulong row = 0; row < X_to_scale_rows; row++)
for(ulong col = 0; col < X_to_scale_cols; col++)
X_to_scale[row][col] = (X_to_scale[row][col] - medians_cts[col]) / iqrs_cts[col];
//---
if(save_data)
{
this.medians = medians_cts;
this.iqrs = iqrs_cts;
}
//--- Aqui siempre se reconstruye
return ReconstructMatrix(X, X_to_scale);
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
matrix RobustScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
return X;
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X;
const ulong X_to_scale_cols = X_to_scale.Cols();
const ulong X_to_scale_rows = X_to_scale.Rows();
//---
if(X_to_scale_cols != this.medians.Size())
{
LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale_cols, this.medians.Size()), FUNCION_ACTUAL);
return X;
}
//---
for(ulong row = 0; row < X_to_scale_rows; row++)
for(ulong col = 0; col < X_to_scale_cols; col++)
X_to_scale[row][col] = (X_to_scale[row][col] - this.medians[col]) / this.iqrs[col];
//---
return (reconstruir) ? ReconstructMatrix(X, X_to_scale) : X_to_scale;
}
//+------------------------------------------------------------------+
vector RobustScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
return X;
}
//---
vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X;
const ulong X_to_scale_size = X_to_scale.Size();
if(X_to_scale_size != this.medians.Size())
{
LogError(StringFormat("Elementos a escalar %I64u != elementos entrenados %u", X_to_scale_size, this.medians.Size()), FUNCION_ACTUAL);
return X;
}
//---
for(ulong i = 0; i < X_to_scale_size; i++)
X_to_scale[i] = (X_to_scale[i] - this.medians[i]) / this.iqrs[i];
//---
return (reconstruir) ? ReconstructVector(X, X_to_scale) : X_to_scale;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
matrix RobustScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
//---
matrix X_to_unscale = (solo_escalar_lo_previsto) ? ExtractMatrixToScale(X_scaled) : X_scaled;
const ulong X_to_unscale_cols = X_to_unscale.Cols();
const ulong X_to_unscale_rows = X_to_unscale.Rows();
//---
if(X_to_unscale_cols != this.medians.Size())
{
LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale_cols, this.medians.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
for(ulong row = 0; row < X_to_unscale_rows; row++)
for(ulong col = 0; col < X_to_unscale_cols; col++)
X_to_unscale[row][col] = X_to_unscale[row][col] * this.iqrs[col] + this.medians[col];
//---
return (reconstruir) ? ReconstructMatrix(X_scaled, X_to_unscale) : X_to_unscale;
}
//+------------------------------------------------------------------+
vector RobustScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
//---
vector X_to_unscale = (solo_escalar_lo_previsto) ? ExtractVectorToScale(X_scaled) : X_scaled;
const ulong X_to_unscale_size = X_to_unscale.Size();
//---
if(X_to_unscale_size != this.medians.Size())
{
LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale_size, this.medians.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
for(ulong i = 0; i < X_to_unscale_size; i++)
X_to_unscale[i] = X_to_unscale[i] * this.iqrs[i] + this.medians[i];
//---
return (reconstruir) ? ReconstructVector(X_scaled, X_to_unscale) : X_to_unscale;
}
//+------------------------------------------------------------------+
#endif // SCALERBYLEO_SCALER_BASE_MQH
//+------------------------------------------------------------------+