ScalerByLeo/ScalerBase.mqh
Nique_372 2420303dbd
2025-09-22 09:44:56 -05:00

1246 lines
No EOL
77 KiB
MQL5

//+------------------------------------------------------------------+
//| ScalerBase.mqh |
//| Copyright 2025, Leo. |
//| https://www.mql5.com |
//+------------------------------------------------------------------+
#property copyright "Copyright 2025, Leo."
#property link "https://www.mql5.com/en/users/nique_372"
#property strict
#ifndef SCALER_BY_LEO_GEN_MQH
#define SCALER_BY_LEO_GEN_MQH
#include "..\\MQLArticles\\Utils\\Funciones Array.mqh"
#include "..\\MQLArticles\\Utils\\File.mqh"
//+------------------------------------------------------------------+
//| Scaler Base |
//+------------------------------------------------------------------+
class ScalerBase : public CLoggerBase
{
protected:
string file_name_out;
string prefix_file;
bool loaded_scaler;
bool use_custom; //Bandera para saber si se usa custom (true) o excluyed (false)
ulong start_col;
ulong count_cols;
ulong excluyed_cols;
virtual bool Save() = 0;
virtual bool Load(string prefix_name) = 0;
//--- Métodos auxiliares
bool CheckSizeCustom(const matrix &mtx) const;
bool CheckSizeExcluded(const matrix &mtx) const;
bool CheckSizeCustom(const vector &v) const;
bool CheckSizeExcluded(const vector &v) const;
//---
matrix ExtractMatrixToScale(const matrix &X) const;
matrix ReconstructMatrix(const matrix &X_original, const matrix &X_scaled) const;
vector ExtractVectorToScale(const vector &X) const;
vector ReconstructVector(const vector &X_original, const vector &X_scaled) const;
public:
ScalerBase(void);
~ScalerBase(void) {}
//---
inline void SetRangeEscaler(ulong start_col_, ulong count_col_); //Custom
inline void SetRangeEscaler(ulong excluyed_cols_ = 1); //Mas simple, el usuario decide cuantas columnas empezando por atras se excluyen
//---
inline bool save(string prefix_name);
inline bool load(string prefix_name);
//---
virtual matrix fit_transform(const matrix &X, bool save_data) = 0;
virtual vector fit_transform(const vector &X) = 0; //Para vectores no se guarda data
virtual matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
virtual vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
virtual matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
virtual vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
// --- Métodos comunes ---
virtual inline string GetOutputFile() const final { return this.file_name_out; }
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
ScalerBase::ScalerBase(void)
: start_col(0), count_cols(0), use_custom(false), excluyed_cols(1), file_name_out(NULL), loaded_scaler(false)
{
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
inline void ScalerBase::SetRangeEscaler(ulong start_col_, ulong count_col_)
{
this.use_custom = true;
this.count_cols = count_col_;
this.start_col = start_col_;
LogInfo(StringFormat("Configurado escalado CUSTOM: columnas %I64u a %I64u (%I64u columnas)", start_col_, start_col_ + count_col_ - 1, count_col_), FUNCION_ACTUAL);
}
//+------------------------------------------------------------------+
inline void ScalerBase::SetRangeEscaler(ulong excluyed_cols_ = 1)
{
this.excluyed_cols = excluyed_cols_;
this.use_custom = false;
LogInfo(StringFormat("Configurado escalado EXCLUDED: excluir últimas %I64u columnas", excluyed_cols_), FUNCION_ACTUAL);
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool ScalerBase::CheckSizeCustom(const matrix &mtx) const
{
if(start_col >= mtx.Cols())
{
LogError(StringFormat("Columna de inicio %I64u >= total columnas %I64u", start_col, mtx.Cols()), FUNCION_ACTUAL);
return false;
}
if(start_col + count_cols > mtx.Cols())
{
LogError(StringFormat("Rango [%I64u:%I64u] excede columnas disponibles %I64u", start_col, start_col + count_cols - 1, mtx.Cols()), FUNCION_ACTUAL);
return false;
}
return true;
}
//+------------------------------------------------------------------+
bool ScalerBase::CheckSizeExcluded(const matrix &mtx) const
{
if(mtx.Cols() < excluyed_cols)
{
LogError(StringFormat("Columnas a excluir %I64u >= total columnas %I64u", excluyed_cols, mtx.Cols()), FUNCION_ACTUAL);
return false;
}
return true;
}
//+------------------------------------------------------------------+
bool ScalerBase::CheckSizeCustom(const vector &v) const
{
if(start_col >= v.Size())
{
LogError(StringFormat("Columna de inicio %I64u >= tamaño total del vector%I64u", start_col, v.Size()), FUNCION_ACTUAL);
return false;
}
if(start_col + count_cols > v.Size())
{
LogError(StringFormat("Rango [%I64u:%I64u] excede el tamaño del vector %I64u", start_col, start_col + count_cols - 1, v.Size()), FUNCION_ACTUAL);
return false;
}
return true;
}
//+------------------------------------------------------------------+
bool ScalerBase::CheckSizeExcluded(const vector &v) const
{
if(v.Size() < excluyed_cols)
{
LogError(StringFormat("Columnas a excluir %I64u >= tamaño del vector: %I64u", excluyed_cols, v.Size()), FUNCION_ACTUAL);
return false;
}
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
matrix ScalerBase::ExtractMatrixToScale(const matrix &X) const
{
matrix result;
if(use_custom)
{
if(X.Cols() == count_cols)
return X;
result.Init(X.Rows(), count_cols);
for(ulong row = 0; row < X.Rows(); row++)
for(ulong col = 0; col < count_cols; col++)
result[row][col] = X[row][start_col + col];
}
else
{
if(excluyed_cols == 0)
return X;
ulong cols_to_scale = X.Cols() - excluyed_cols;
result.Init(X.Rows(), cols_to_scale);
for(ulong row = 0; row < X.Rows(); row++)
for(ulong col = 0; col < cols_to_scale; col++)
result[row][col] = X[row][col];
}
return result;
}
//+------------------------------------------------------------------+
matrix ScalerBase::ReconstructMatrix(const matrix &X_original, const matrix &X_scaled) const
{
if(X_original.Rows() == X_scaled.Rows() && X_original.Cols() == X_scaled.Cols())
return X_scaled;
matrix result = X_original; // Copia completa
if(use_custom)
{
for(ulong row = 0; row < X_original.Rows(); row++)
for(ulong col = 0; col < count_cols; col++)
result[row][start_col + col] = X_scaled[row][col];
}
else
{
for(ulong row = 0; row < X_original.Rows(); row++)
for(ulong col = 0; col < X_scaled.Cols(); col++)
result[row][col] = X_scaled[row][col];
}
return result;
}
//+------------------------------------------------------------------+
vector ScalerBase::ExtractVectorToScale(const vector &X) const
{
vector result;
if(use_custom)
{
if(X.Size() == count_cols)
return X;
// Extraer rango específico
result.Resize(count_cols);
for(ulong i = 0; i < count_cols; i++)
result[i] = X[start_col + i];
}
else
{
if(excluyed_cols == 0)
return X;
// Extraer todas excepto las últimas N
ulong size_to_scale = X.Size() - excluyed_cols;
result.Resize(size_to_scale);
for(ulong i = 0; i < size_to_scale; i++)
result[i] = X[i];
}
return result;
}
//+------------------------------------------------------------------+
vector ScalerBase::ReconstructVector(const vector &X_original, const vector &X_scaled) const
{
if(X_original.Size() == X_scaled.Size())
return X_scaled;
vector result = X_original;
if(use_custom)
{
// Reemplazar rango específico
for(ulong i = 0; i < count_cols; i++)
result[start_col + i] = X_scaled[i];
}
else
{
// Reemplazar todas excepto las últimas N
for(ulong i = 0; i < X_scaled.Size(); i++)
result[i] = X_scaled[i];
}
return result;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
inline bool ScalerBase::load(string prefix_name)
{
loaded_scaler = true;
return this.Load(prefix_name);
}
//+------------------------------------------------------------------+
inline bool ScalerBase::save(string prefix_name)
{
this.file_name_out = prefix_name + this.prefix_file;
return this.Save();
}
//+------------------------------------------------------------------+
//| Standardization Scaler |
//+------------------------------------------------------------------+
class StandardizationScaler : public ScalerBase
{
protected:
vector mean, std;
bool Save() override;
bool Load(string prefix_name) override;
public:
StandardizationScaler() : ScalerBase() { this.prefix_file = "_mean_std.csv"; }
matrix fit_transform(const matrix &X, bool save_data) override;
vector fit_transform(const vector &X) override; //Para vectores no se guarda data
matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
};
//+------------------------------------------------------------------+
bool StandardizationScaler::Save()
{
FileDelete(this.file_name_out);
ResetLastError();
int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON);
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
FileWrite(handle, vector_to_string(this.mean));
FileWrite(handle, vector_to_string(this.std));
FileWrite(handle, count_cols);
FileWrite(handle, start_col);
FileWrite(handle, excluyed_cols);
FileWrite(handle, (int)use_custom);
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
bool StandardizationScaler::Load(string prefix_name)
{
this.file_name_out = prefix_name + this.prefix_file;
ResetLastError();
int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n");
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
this.mean = string_to_vector(FileReadString(handle));
this.std = string_to_vector(FileReadString(handle));
this.count_cols = (ulong)StringToInteger(FileReadString(handle));
this.start_col = (ulong)StringToInteger(FileReadString(handle));
this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle));
this.use_custom = (bool)StringToInteger(FileReadString(handle));
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
vector StandardizationScaler::fit_transform(const vector &X)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
vector X_to_scaled = ExtractVectorToScale(X);
//---
double mean_val = X_to_scaled.Mean();
double std_val = X_to_scaled.Std();
if(std_val < 1e-9)
std_val = 1.0;
//---
for(ulong i = 0; i < X_to_scaled.Size(); i++)
X_to_scaled[i] = (X_to_scaled[i] - mean_val) / std_val;
return ReconstructVector(X, X_to_scaled);
}
//+------------------------------------------------------------------+
matrix StandardizationScaler::fit_transform(const matrix &X, bool save_data)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL);
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = ExtractMatrixToScale(X);
LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale.Cols()), FUNCION_ACTUAL);
vector mean_cts(X_to_scale.Cols());
vector std_cts(X_to_scale.Cols());
for(ulong i = 0; i < X_to_scale.Cols(); i++)
{
mean_cts[i] = X_to_scale.Col(i).Mean();
std_cts[i] = X_to_scale.Col(i).Std();
// Evitar división por cero
if(std_cts[i] < 1e-9)
{
LogWarning(StringFormat("Columna %I64u tiene std muy pequeño (%.2e), usando 1.0", i, std_cts[i]), FUNCION_ACTUAL);
std_cts[i] = 1.0;
}
}
//---
matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols());
for(ulong row = 0; row < X_to_scale.Rows(); row++)
for(ulong col = 0; col < X_to_scale.Cols(); col++)
X_scaled[row][col] = (X_to_scale[row][col] - mean_cts[col]) / std_cts[col];
if(save_data)
{
this.mean = mean_cts;
this.std = std_cts;
}
//--- Aqui siempre se reconstruye
return ReconstructMatrix(X, X_scaled);
}
//+------------------------------------------------------------------+
matrix StandardizationScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
return X;
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X;
if(X_to_scale.Cols() != this.mean.Size())
{
LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale.Cols(), this.mean.Size()), FUNCION_ACTUAL);
return X;
}
//---
matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols());
for(ulong row = 0; row < X_to_scale.Rows(); row++)
for(ulong col = 0; col < X_to_scale.Cols(); col++)
X_scaled[row][col] = (X_to_scale[row][col] - this.mean[col]) / this.std[col];
//---
if(reconstruir)
return ReconstructMatrix(X, X_scaled);
else
return X_scaled;
}
//+------------------------------------------------------------------+
vector StandardizationScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
return X;
}
//---
vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X;
if(X_to_scale.Size() != this.mean.Size())
{
LogError(StringFormat("Elementos a escalar %I64 != elementos entrenados %u", X_to_scale.Size(), this.mean.Size()), FUNCION_ACTUAL);
return X;
}
//---
vector X_scaled(X_to_scale.Size());
for(ulong i = 0; i < X_to_scale.Size(); i++)
X_scaled[i] = (X_to_scale[i] - this.mean[i]) / this.std[i];
//---
if(reconstruir)
return ReconstructVector(X, X_scaled);
else
return X_scaled;
}
//+------------------------------------------------------------------+
matrix StandardizationScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
//---
matrix X_to_unscale = (solo_escalar_lo_previsto) ? ExtractMatrixToScale(X_scaled) : X_scaled;
//---
if(X_to_unscale.Cols() != this.mean.Size())
{
LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale.Cols(), this.mean.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
matrix X_unscaled(X_to_unscale.Rows(), X_to_unscale.Cols());
for(ulong row = 0; row < X_to_unscale.Rows(); row++)
for(ulong col = 0; col < X_to_unscale.Cols(); col++)
X_unscaled[row][col] = X_to_unscale[row][col] * this.std[col] + this.mean[col];
//---
if(reconstruir)
return ReconstructMatrix(X_scaled, X_unscaled);
else
return X_unscaled;
}
//+------------------------------------------------------------------+
vector StandardizationScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
//---
vector X_to_unscale;
if(solo_escalar_lo_previsto)
X_to_unscale = ExtractVectorToScale(X_scaled);
else
X_to_unscale = X_scaled;
//---
if(X_to_unscale.Size() != this.mean.Size())
{
LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale.Size(), this.mean.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
vector X_unscaled(X_to_unscale.Size());
for(ulong i = 0; i < X_to_unscale.Size(); i++)
{
X_unscaled[i] = X_to_unscale[i] * this.std[i] + this.mean[i];
}
//---
if(reconstruir)
return ReconstructVector(X_scaled, X_unscaled);
else
return X_unscaled;
}
//+------------------------------------------------------------------+
//| MaxMin Scaler |
//+------------------------------------------------------------------+
class MaxMinScaler : public ScalerBase
{
protected:
vector min_vals, max_vals;
bool Save() override;
bool Load(string prefix_name) override;
public:
MaxMinScaler() : ScalerBase() { this.prefix_file = "_min_max.csv"; }
vector fit_transform(const vector &X) override; //Para vectores no se guarda data
matrix fit_transform(const matrix &X, bool save_data) override;
matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
};
//+------------------------------------------------------------------+
bool MaxMinScaler::Save()
{
FileDelete(this.file_name_out);
ResetLastError();
int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON);
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d - Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
FileWrite(handle, vector_to_string(this.min_vals));
FileWrite(handle, vector_to_string(this.max_vals));
FileWrite(handle, count_cols);
FileWrite(handle, start_col);
FileWrite(handle, excluyed_cols);
FileWrite(handle, (int)use_custom);
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
bool MaxMinScaler::Load(string prefix_name)
{
this.file_name_out = prefix_name + this.prefix_file;
ResetLastError();
int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n");
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d - Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
this.min_vals = string_to_vector(FileReadString(handle));
this.max_vals = string_to_vector(FileReadString(handle));
this.count_cols = (ulong)StringToInteger(FileReadString(handle));
this.start_col = (ulong)StringToInteger(FileReadString(handle));
this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle));
this.use_custom = (bool)StringToInteger(FileReadString(handle));
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
vector MaxMinScaler::fit_transform(const vector &X)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado | no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
vector X_to_scale = ExtractVectorToScale(X);
//---
double max = X_to_scale.Max();
double min = X_to_scale.Min();
if(max - min < 1e-10)
max = min + 1.0;
//---
for(ulong i = 0; i < X_to_scale.Size(); i++)
X_to_scale[i] = (X_to_scale[i] - min) / (max - min);
//---
return ReconstructVector(X, X_to_scale);
}
//+------------------------------------------------------------------+
matrix MaxMinScaler::fit_transform(const matrix &X, bool save_data)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado | no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL);
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = ExtractMatrixToScale(X);
LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale.Cols()), FUNCION_ACTUAL);
//---
vector min_vals_cts(X_to_scale.Cols());
vector max_vals_cts(X_to_scale.Cols());
for(ulong i = 0; i < X_to_scale.Cols(); i++)
{
min_vals_cts[i] = X_to_scale.Col(i).Min();
max_vals_cts[i] = X_to_scale.Col(i).Max();
if(fabs(max_vals_cts[i] - min_vals_cts[i]) < 1e-10)
{
LogWarning(StringFormat("Columna %I64u tiene rango muy pequeño (%.2e), usando rango 1.0", i, max_vals_cts[i] - min_vals_cts[i]), FUNCION_ACTUAL);
max_vals_cts[i] = min_vals_cts[i] + 1.0;
}
}
//---
matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols());
for(ulong row = 0; row < X_to_scale.Rows(); row++)
for(ulong col = 0; col < X_to_scale.Cols(); col++)
X_scaled[row][col] = (X_to_scale[row][col] - min_vals_cts[col]) / (max_vals_cts[col] - min_vals_cts[col]);
if(save_data)
{
this.min_vals = min_vals_cts;
this.max_vals = max_vals_cts;
}
//---
return ReconstructMatrix(X, X_scaled);
}
//+------------------------------------------------------------------+
matrix MaxMinScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform o load()", FUNCION_ACTUAL);
return X;
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X;
if(X_to_scale.Cols() != this.min_vals.Size())
{
LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale.Cols(), this.min_vals.Size()), FUNCION_ACTUAL);
return X;
}
//---
matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols());
for(ulong row = 0; row < X_to_scale.Rows(); row++)
for(ulong col = 0; col < X_to_scale.Cols(); col++)
X_scaled[row][col] = (X_to_scale[row][col] - this.min_vals[col]) / (this.max_vals[col] - this.min_vals[col]);
//---
if(reconstruir)
return ReconstructMatrix(X, X_scaled);
else
return X_scaled;
}
//+------------------------------------------------------------------+
vector MaxMinScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform o load()", FUNCION_ACTUAL);
return X;
}
//---
vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X;
if(X_to_scale.Size() != this.min_vals.Size())
{
LogError(StringFormat("Elementos a escalar %I64u != elementos entrenados %u", X_to_scale.Size(), this.min_vals.Size()), FUNCION_ACTUAL);
return X;
}
//---
vector X_scaled(X_to_scale.Size());
for(ulong i = 0; i < X_to_scale.Size(); i++)
X_scaled[i] = (X_to_scale[i] - this.min_vals[i]) / (this.max_vals[i] - this.min_vals[i]);
//---
if(reconstruir)
return ReconstructVector(X, X_scaled);
else
return X_scaled;
}
//+------------------------------------------------------------------+
matrix MaxMinScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
//---
matrix X_to_unscale;
if(solo_escalar_lo_previsto)
X_to_unscale = ExtractMatrixToScale(X_scaled);
else
X_to_unscale = X_scaled;
//---
if(X_to_unscale.Cols() != this.min_vals.Size())
{
LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale.Cols(), this.min_vals.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
matrix X_unscaled(X_to_unscale.Rows(), X_to_unscale.Cols());
for(ulong row = 0; row < X_to_unscale.Rows(); row++)
{
for(ulong col = 0; col < X_to_unscale.Cols(); col++)
{
X_unscaled[row][col] = X_to_unscale[row][col] * (this.max_vals[col] - this.min_vals[col]) + this.min_vals[col];
}
}
//---
if(reconstruir)
return ReconstructMatrix(X_scaled, X_unscaled);
else
return X_unscaled;
}
//+------------------------------------------------------------------+
vector MaxMinScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
//---
vector X_to_unscale;
if(solo_escalar_lo_previsto)
X_to_unscale = ExtractVectorToScale(X_scaled);
else
X_to_unscale = X_scaled;
if(X_to_unscale.Size() != this.min_vals.Size())
{
LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale.Size(), this.min_vals.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
vector X_unscaled(X_to_unscale.Size());
for(ulong i = 0; i < X_to_unscale.Size(); i++)
X_unscaled[i] = X_to_unscale[i] * (this.max_vals[i] - this.min_vals[i]) + this.min_vals[i];
//---
if(reconstruir)
return ReconstructVector(X_scaled, X_unscaled);
else
return X_unscaled;
}
//+------------------------------------------------------------------+
//| Robust Scaler |
//+------------------------------------------------------------------+
class RobustScaler : public ScalerBase
{
protected:
vector medians, iqrs;
bool Save() override;
bool Load(string prefix_name) override;
public:
RobustScaler() : ScalerBase() { this.prefix_file = "_median_iqr.csv"; }
vector fit_transform(const vector &X) override; //Para vectores no se guarda data
matrix fit_transform(const matrix &X, bool save_data) override;
matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
};
//+------------------------------------------------------------------+
bool RobustScaler::Save()
{
FileDelete(this.file_name_out);
ResetLastError();
int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON);
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d - Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
FileWrite(handle, vector_to_string(this.medians));
FileWrite(handle, vector_to_string(this.iqrs));
FileWrite(handle, count_cols);
FileWrite(handle, start_col);
FileWrite(handle, excluyed_cols);
FileWrite(handle, (int)use_custom);
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
bool RobustScaler::Load(string prefix_name)
{
this.file_name_out = prefix_name + this.prefix_file;
ResetLastError();
int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n");
if(handle == INVALID_HANDLE)
{
LogFatalError(StringFormat("Invalid handle Err= %d - Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
return false;
}
this.medians = string_to_vector(FileReadString(handle));
this.iqrs = string_to_vector(FileReadString(handle));
this.count_cols = (ulong)StringToInteger(FileReadString(handle));
this.start_col = (ulong)StringToInteger(FileReadString(handle));
this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle));
this.use_custom = (bool)StringToInteger(FileReadString(handle));
FileClose(handle);
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
vector RobustScaler::fit_transform(const vector &X)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado | no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
vector X_to_scale = ExtractVectorToScale(X);
//---
double medians_cts = X_to_scale.Median();
double q75 = X_to_scale.Percentile(75);
double q25 = X_to_scale.Percentile(25);
double iqrs_cts = q75 - q25;
if(fabs(iqrs_cts) < 1e-10)
iqrs_cts = 1.00;
//---
for(ulong i = 0; i < X_to_scale.Size(); i++)
X_to_scale[i] = (X_to_scale[i] - medians_cts) / iqrs_cts;
//---
return ReconstructVector(X, X_to_scale);
}
//+------------------------------------------------------------------+
matrix RobustScaler::fit_transform(const matrix &X, bool save_data)
{
if(loaded_scaler)
{
LogWarning("Este es un escalador cargado | no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
return transform(X);
}
LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL);
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = ExtractMatrixToScale(X);
LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale.Cols()), FUNCION_ACTUAL);
//---
vector medians_cts(X_to_scale.Cols());
vector iqrs_cts(X_to_scale.Cols());
//---
for(ulong i = 0; i < X_to_scale.Cols(); i++)
{
vector col = X_to_scale.Col(i);
medians_cts[i] = col.Median();
double q75 = col.Percentile(75);
double q25 = col.Percentile(25);
iqrs_cts[i] = q75 - q25;
if(fabs(iqrs_cts[i]) < 1e-10)
{
LogWarning(StringFormat("Columna %I64u tiene IQR muy pequeño (%.2e), usando 1.0", i, iqrs_cts[i]), FUNCION_ACTUAL);
iqrs_cts[i] = 1.0;
}
}
//---
matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols());
for(ulong row = 0; row < X_to_scale.Rows(); row++)
for(ulong col = 0; col < X_to_scale.Cols(); col++)
X_scaled[row][col] = (X_to_scale[row][col] - medians_cts[col]) / iqrs_cts[col];
if(save_data)
{
this.medians = medians_cts;
this.iqrs = iqrs_cts;
}
//---
return ReconstructMatrix(X, X_scaled);
}
//+------------------------------------------------------------------+
matrix RobustScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform o load()", FUNCION_ACTUAL);
return X;
}
//---
if(use_custom)
{
if(!CheckSizeCustom(X))
return X;
}
else
{
if(!CheckSizeExcluded(X))
return X;
}
//---
matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X;
if(X_to_scale.Cols() != this.medians.Size())
{
LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale.Cols(), this.medians.Size()), FUNCION_ACTUAL);
return X;
}
//---
matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols());
for(ulong row = 0; row < X_to_scale.Rows(); row++)
for(ulong col = 0; col < X_to_scale.Cols(); col++)
X_scaled[row][col] = (X_to_scale[row][col] - this.medians[col]) / this.iqrs[col];
//---
if(reconstruir)
return ReconstructMatrix(X, X_scaled);
else
return X_scaled;
}
//+------------------------------------------------------------------+
vector RobustScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform o load()", FUNCION_ACTUAL);
return X;
}
//---
vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X;
if(X_to_scale.Size() != this.medians.Size())
{
LogError(StringFormat("Elementos a escalar %I64u != elementos entrenados %u", X_to_scale.Size(), this.medians.Size()), FUNCION_ACTUAL);
return X;
}
//---
vector X_scaled(X_to_scale.Size());
for(ulong i = 0; i < X_to_scale.Size(); i++)
X_scaled[i] = (X_to_scale[i] - this.medians[i]) / this.iqrs[i];
//---
if(reconstruir)
return ReconstructVector(X, X_scaled);
else
return X_scaled;
}
//+------------------------------------------------------------------+
matrix RobustScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
matrix X_to_unscale;
if(solo_escalar_lo_previsto)
X_to_unscale = ExtractMatrixToScale(X_scaled);
else
X_to_unscale = X_scaled;
if(X_to_unscale.Cols() != this.medians.Size())
{
LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale.Cols(), this.medians.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
matrix X_unscaled(X_to_unscale.Rows(), X_to_unscale.Cols());
for(ulong row = 0; row < X_to_unscale.Rows(); row++)
for(ulong col = 0; col < X_to_unscale.Cols(); col++)
X_unscaled[row][col] = X_to_unscale[row][col] * this.iqrs[col] + this.medians[col];
//---
if(reconstruir)
return ReconstructMatrix(X_scaled, X_unscaled);
else
return X_unscaled;
}
//+------------------------------------------------------------------+
vector RobustScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
{
if(!loaded_scaler)
{
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
return X_scaled;
}
vector X_to_unscale;
if(solo_escalar_lo_previsto)
X_to_unscale = ExtractVectorToScale(X_scaled);
else
X_to_unscale = X_scaled;
if(X_to_unscale.Size() != this.medians.Size())
{
LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale.Size(), this.medians.Size()), FUNCION_ACTUAL);
return X_scaled;
}
//---
vector X_unscaled(X_to_unscale.Size());
for(ulong i = 0; i < X_to_unscale.Size(); i++)
X_unscaled[i] = X_to_unscale[i] * this.iqrs[i] + this.medians[i];
//---
if(reconstruir)
return ReconstructVector(X_scaled, X_unscaled);
else
return X_unscaled;
}
//+------------------------------------------------------------------+
#endif