1570 lines
97 KiB
MQL5
1570 lines
97 KiB
MQL5
//+------------------------------------------------------------------+
|
|
//| ScalerBase.mqh |
|
|
//| Copyright 2025, Leo. |
|
|
//| https://www.mql5.com |
|
|
//+------------------------------------------------------------------+
|
|
#property copyright "Copyright 2025, Leo."
|
|
#property link "https://www.mql5.com/en/users/nique_372"
|
|
#property strict
|
|
|
|
#ifndef SCALERBYLEO_SCALER_BASE_MQH
|
|
#define SCALERBYLEO_SCALER_BASE_MQH
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
#include "..\\..\\MQLArticles\\Utils\\Basic.mqh"
|
|
#include "..\\..\\MQLArticles\\Utils\\File.mqh"
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
#define SCALER_BASE_FINAL_LINE -1
|
|
#define SCALER_BASE_MIN_VALUE 1e-10
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| Scaler Base |
|
|
//+------------------------------------------------------------------+
|
|
class ScalerBase : public CLoggerBase
|
|
{
|
|
protected:
|
|
string file_name_out;
|
|
string prefix_file;
|
|
bool loaded_scaler;
|
|
|
|
bool use_custom; //Bandera para saber si se usa custom (true) o excluyed (false)
|
|
ulong start_col;
|
|
ulong count_cols;
|
|
ulong excluyed_cols;
|
|
|
|
virtual bool Save() = 0;
|
|
virtual bool Load(string prefix_name) = 0;
|
|
|
|
//--- Métodos auxiliares
|
|
bool CheckSizeCustom(const matrix &mtx) const;
|
|
bool CheckSizeExcluded(const matrix &mtx) const;
|
|
bool CheckSizeCustom(const vector &v) const;
|
|
bool CheckSizeExcluded(const vector &v) const;
|
|
|
|
//---
|
|
matrix ExtractMatrixToScale(const matrix &X) const;
|
|
matrix ReconstructMatrix(const matrix &X_original, const matrix &X_scaled) const;
|
|
vector ExtractVectorToScale(const vector &X) const;
|
|
vector ReconstructVector(const vector &X_original, const vector &X_scaled) const;
|
|
|
|
public:
|
|
ScalerBase(void);
|
|
~ScalerBase(void) {}
|
|
|
|
//---
|
|
inline void SetRangeEscaler(ulong start_col_, ulong count_col_); //Custom
|
|
inline void SetRangeEscaler(ulong excluyed_cols_ = 1); //Mas simple, el usuario decide cuantas columnas empezando por atras se excluyen
|
|
|
|
//---
|
|
inline bool save(string prefix_name);
|
|
inline bool load(string prefix_name);
|
|
|
|
//--- Transform
|
|
//- Fit
|
|
// Matrix
|
|
virtual matrix fit_transform(const matrix &X, bool save_data) = 0;
|
|
|
|
// Transorfmra un archivo
|
|
bool fit_transform_file(const string &file_name, ushort sep, int extra_file_flags, bool save_data, uint file_code_page = CP_ACP, int start_line = 1);
|
|
|
|
// Transforma un archivo y devuelve la matriz
|
|
matrix fit_transform_by_file(const string& file_name, ushort sep, int file_flags, bool save_data, uint file_code_page = CP_ACP,
|
|
int start_line = 1, int end_line = SCALER_BASE_FINAL_LINE);
|
|
|
|
// Transforma un string
|
|
matrix fit_transform_by_src(const string& src, ushort sep, bool save_data, int str_start_line = 1, int str_end_line = SCALER_BASE_FINAL_LINE); // Transforma string
|
|
|
|
// Vector
|
|
virtual vector fit_transform(const vector &X) = 0; //Para vectores no se guarda data
|
|
|
|
|
|
//- Normal
|
|
virtual matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
|
|
virtual vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
|
|
virtual matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
|
|
virtual vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0;
|
|
|
|
// --- Métodos comunes ---
|
|
virtual inline string GetOutputFile() const final { return this.file_name_out; }
|
|
};
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
ScalerBase::ScalerBase(void)
|
|
: start_col(0), count_cols(0), use_custom(false), excluyed_cols(1), file_name_out(NULL), loaded_scaler(false)
|
|
{
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
inline void ScalerBase::SetRangeEscaler(ulong start_col_, ulong count_col_)
|
|
{
|
|
this.use_custom = true;
|
|
this.count_cols = count_col_;
|
|
this.start_col = start_col_;
|
|
LogInfo(StringFormat("Configurado escalado CUSTOM: columnas %I64u a %I64u (%I64u columnas)", start_col_, start_col_ + count_col_ - 1, count_col_), FUNCION_ACTUAL);
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
inline void ScalerBase::SetRangeEscaler(ulong excluyed_cols_ = 1)
|
|
{
|
|
this.excluyed_cols = excluyed_cols_;
|
|
this.use_custom = false;
|
|
LogInfo(StringFormat("Configurado escalado EXCLUDED: excluir últimas %I64u columnas", excluyed_cols_), FUNCION_ACTUAL);
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
bool ScalerBase::CheckSizeCustom(const matrix &mtx) const
|
|
{
|
|
if(start_col >= mtx.Cols())
|
|
{
|
|
LogError(StringFormat("Columna de inicio %I64u >= total columnas %I64u", start_col, mtx.Cols()), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
if(start_col + count_cols > mtx.Cols())
|
|
{
|
|
LogError(StringFormat("Rango [%I64u:%I64u] excede columnas disponibles %I64u", start_col, start_col + count_cols - 1, mtx.Cols()), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
bool ScalerBase::CheckSizeExcluded(const matrix &mtx) const
|
|
{
|
|
if(mtx.Cols() < excluyed_cols)
|
|
{
|
|
LogError(StringFormat("Columnas a excluir %I64u >= total columnas %I64u", excluyed_cols, mtx.Cols()), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
//+------------------------------------------------------------------+
|
|
bool ScalerBase::CheckSizeCustom(const vector &v) const
|
|
{
|
|
if(start_col >= v.Size())
|
|
{
|
|
LogError(StringFormat("Columna de inicio %I64u >= tamaño total del vector%I64u", start_col, v.Size()), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
if(start_col + count_cols > v.Size())
|
|
{
|
|
LogError(StringFormat("Rango [%I64u:%I64u] excede el tamaño del vector %I64u", start_col, start_col + count_cols - 1, v.Size()), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
bool ScalerBase::CheckSizeExcluded(const vector &v) const
|
|
{
|
|
if(v.Size() < excluyed_cols)
|
|
{
|
|
LogError(StringFormat("Columnas a excluir %I64u >= tamaño del vector: %I64u", excluyed_cols, v.Size()), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
matrix ScalerBase::ExtractMatrixToScale(const matrix &X) const
|
|
{
|
|
matrix result;
|
|
|
|
if(use_custom)
|
|
{
|
|
if(X.Cols() == count_cols)
|
|
return X;
|
|
|
|
result.Init(X.Rows(), count_cols);
|
|
|
|
for(ulong row = 0; row < X.Rows(); row++)
|
|
for(ulong col = 0; col < count_cols; col++)
|
|
result[row][col] = X[row][start_col + col];
|
|
}
|
|
else
|
|
{
|
|
if(excluyed_cols == 0)
|
|
return X;
|
|
|
|
ulong cols_to_scale = X.Cols() - excluyed_cols;
|
|
result.Init(X.Rows(), cols_to_scale);
|
|
|
|
for(ulong row = 0; row < X.Rows(); row++)
|
|
for(ulong col = 0; col < cols_to_scale; col++)
|
|
result[row][col] = X[row][col];
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
matrix ScalerBase::ReconstructMatrix(const matrix &X_original, const matrix &X_scaled) const
|
|
{
|
|
if(X_original.Rows() == X_scaled.Rows() && X_original.Cols() == X_scaled.Cols())
|
|
return X_scaled;
|
|
|
|
matrix result = X_original; // Copia completa
|
|
|
|
if(use_custom)
|
|
{
|
|
for(ulong row = 0; row < X_original.Rows(); row++)
|
|
for(ulong col = 0; col < count_cols; col++)
|
|
result[row][start_col + col] = X_scaled[row][col];
|
|
}
|
|
else
|
|
{
|
|
for(ulong row = 0; row < X_original.Rows(); row++)
|
|
for(ulong col = 0; col < X_scaled.Cols(); col++)
|
|
result[row][col] = X_scaled[row][col];
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
vector ScalerBase::ExtractVectorToScale(const vector &X) const
|
|
{
|
|
vector result;
|
|
|
|
if(use_custom)
|
|
{
|
|
if(X.Size() == count_cols)
|
|
return X;
|
|
|
|
// Extraer rango específico
|
|
result.Resize(count_cols);
|
|
for(ulong i = 0; i < count_cols; i++)
|
|
result[i] = X[start_col + i];
|
|
}
|
|
else
|
|
{
|
|
if(excluyed_cols == 0)
|
|
return X;
|
|
|
|
// Extraer todas excepto las últimas N
|
|
ulong size_to_scale = X.Size() - excluyed_cols;
|
|
result.Resize(size_to_scale);
|
|
for(ulong i = 0; i < size_to_scale; i++)
|
|
result[i] = X[i];
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
vector ScalerBase::ReconstructVector(const vector &X_original, const vector &X_scaled) const
|
|
{
|
|
if(X_original.Size() == X_scaled.Size())
|
|
return X_scaled;
|
|
|
|
vector result = X_original;
|
|
if(use_custom)
|
|
{
|
|
// Reemplazar rango específico
|
|
for(ulong i = 0; i < count_cols; i++)
|
|
result[start_col + i] = X_scaled[i];
|
|
}
|
|
else
|
|
{
|
|
// Reemplazar todas excepto las últimas N
|
|
for(ulong i = 0; i < X_scaled.Size(); i++)
|
|
result[i] = X_scaled[i];
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
inline bool ScalerBase::load(string prefix_name)
|
|
{
|
|
loaded_scaler = true;
|
|
return this.Load(prefix_name);
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
inline bool ScalerBase::save(string prefix_name)
|
|
{
|
|
this.file_name_out = prefix_name + this.prefix_file;
|
|
return this.Save();
|
|
}
|
|
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
/*
|
|
Notas:
|
|
- file_flags: puede tener los valores de FILE_ANSI, FILE_COMON, etc.. las banderas (FILE_READ) ya bienen por derfecto
|
|
*/
|
|
|
|
//+------------------------------------------------------------------+
|
|
matrix ScalerBase::fit_transform_by_file(const string& file_name, ushort sep, int file_flags, bool save_data, uint file_code_page = CP_ACP,
|
|
int start_line = 1, int end_line = SCALER_BASE_FINAL_LINE)
|
|
{
|
|
//---
|
|
ResetLastError();
|
|
|
|
//---
|
|
matrix m = {};
|
|
const int file_handle = FileOpen(file_name, (file_flags | FILE_READ | FILE_TXT), '\n', file_code_page);
|
|
if(file_handle == INVALID_HANDLE)
|
|
{
|
|
LogCriticalError(StringFormat("Error al abrir el archivo = %s, ultimo erorr = %d", file_name, GetLastError()), FUNCION_ACTUAL);
|
|
return m;
|
|
}
|
|
|
|
//---
|
|
int curr_line = 0;
|
|
string arr[];
|
|
int dt = 0;
|
|
int curr_row = 0;
|
|
string line = "";
|
|
|
|
//--- Se lee hasta el final
|
|
if(end_line == SCALER_BASE_FINAL_LINE)
|
|
{
|
|
//---
|
|
while(!FileIsEnding(file_handle))
|
|
{
|
|
//---
|
|
line = FileReadString(file_handle);
|
|
|
|
//---
|
|
if(curr_line >= start_line)
|
|
{
|
|
//---
|
|
if((dt = StringSplit(line, sep, arr)) < 1)
|
|
{
|
|
LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL);
|
|
FileClose(file_handle);
|
|
return m;
|
|
}
|
|
|
|
//---
|
|
m.Resize(curr_row + 1, dt);
|
|
for(int i = 0; i < dt; i++)
|
|
{
|
|
m[curr_row][i] = double(arr[i]);
|
|
}
|
|
|
|
//---
|
|
curr_row++;
|
|
}
|
|
|
|
//---
|
|
curr_line++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
//---
|
|
while(!FileIsEnding(file_handle))
|
|
{
|
|
//---
|
|
line = FileReadString(file_handle);
|
|
|
|
//---
|
|
if(curr_line >= start_line)
|
|
{
|
|
//---
|
|
if((dt = StringSplit(line, sep, arr)) < 1)
|
|
{
|
|
LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL);
|
|
FileClose(file_handle);
|
|
return m;
|
|
}
|
|
|
|
//---
|
|
m.Resize(curr_row + 1, dt);
|
|
for(int i = 0; i < dt; i++)
|
|
{
|
|
m[curr_row][i] = double(arr[i]);
|
|
}
|
|
|
|
//---
|
|
curr_row++;
|
|
}
|
|
|
|
//---
|
|
curr_line++;
|
|
if(curr_line > end_line)
|
|
break;
|
|
}
|
|
}
|
|
|
|
//---
|
|
FileClose(file_handle);
|
|
|
|
//---
|
|
return fit_transform(m, save_data);
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
bool ScalerBase::fit_transform_file(const string &file_name, ushort sep, int extra_file_flags, bool save_data, uint file_code_page = CP_ACP, int start_line = 1)
|
|
{
|
|
//---
|
|
ResetLastError();
|
|
|
|
//---
|
|
matrix m = {};
|
|
int file_handle = FileOpen(file_name, (extra_file_flags | FILE_TXT | FILE_READ), '\n', file_code_page);
|
|
if(file_handle == INVALID_HANDLE)
|
|
{
|
|
LogCriticalError(StringFormat("Error al abrir el archivo = %s, ultimo erorr = %d", file_name, GetLastError()), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
//---
|
|
int curr_line = 0;
|
|
string arr[];
|
|
int dt = 0;
|
|
int curr_row = 0;
|
|
string line = "";
|
|
string extra[];
|
|
ArrayResize(extra, 0);
|
|
|
|
|
|
//--- Se lee hasta el final
|
|
while(!FileIsEnding(file_handle))
|
|
{
|
|
//---
|
|
line = FileReadString(file_handle);
|
|
//PrintFormat("'%s'", line);
|
|
|
|
//---
|
|
if(curr_line >= start_line)
|
|
{
|
|
//---
|
|
if((dt = StringSplit(line, sep, arr)) < 1)
|
|
{
|
|
LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL);
|
|
FileClose(file_handle);
|
|
return false;
|
|
}
|
|
|
|
//---
|
|
m.Resize(curr_row + 1, dt);
|
|
for(int i = 0; i < dt; i++)
|
|
{
|
|
m[curr_row][i] = double(arr[i]);
|
|
}
|
|
|
|
//---
|
|
curr_row++;
|
|
}
|
|
else
|
|
{
|
|
extra[ArrayResize(extra, ArraySize(extra) + 1) - 1] = line;
|
|
}
|
|
|
|
//---
|
|
curr_line++;
|
|
}
|
|
|
|
|
|
//--- Transformamos y cerramos
|
|
m = fit_transform(m, save_data);
|
|
FileClose(file_handle);
|
|
|
|
//--- Reescribir
|
|
// Abrimos pero en escritura
|
|
file_handle = FileOpen(file_name, (extra_file_flags | FILE_TXT | FILE_WRITE), 0, file_code_page);
|
|
if(file_handle == INVALID_HANDLE)
|
|
{
|
|
LogCriticalError(StringFormat("Error al abrir el archivo = %s, ultimo erorr = %d", file_name, GetLastError()), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
// Init line
|
|
for(int i = 0; i < ArraySize(extra); i++)
|
|
FileWrite(file_handle, extra[i]);
|
|
|
|
// Matrix
|
|
const ulong rows = m.Rows();
|
|
const ulong cols = m.Cols();
|
|
for(ulong row = 0; row < rows; row++)
|
|
{
|
|
//---
|
|
line = "";
|
|
for(ulong col = 0; col < cols; col++)
|
|
{
|
|
line += string(m[row][col]) + ", ";
|
|
}
|
|
StringSetLength(line, StringLen(line) - 2);
|
|
|
|
//---
|
|
FileWrite(file_handle, line);
|
|
}
|
|
|
|
//--- Ceramos el archivo
|
|
FileClose(file_handle);
|
|
|
|
//---
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
matrix ScalerBase::fit_transform_by_src(const string& src, ushort sep, bool save_data, int str_start_line = 1, int str_end_line = SCALER_BASE_FINAL_LINE)
|
|
{
|
|
//---
|
|
matrix m = {};
|
|
|
|
//---
|
|
int curr_line = 0;
|
|
string arr[];
|
|
int dt = 0;
|
|
int curr_row = 0;
|
|
string general_arr[];
|
|
string line = "";
|
|
|
|
//---
|
|
const int total_row = StringSplit(src, '\n', general_arr);
|
|
if(total_row <= str_start_line)
|
|
{
|
|
LogError(StringFormat("Numero de filas del string %d es invalida", total_row), FUNCION_ACTUAL);
|
|
return m;
|
|
}
|
|
|
|
//--- Se lee hasta el final
|
|
if(str_end_line == SCALER_BASE_FINAL_LINE)
|
|
{
|
|
//---
|
|
for(int r = 0; r < total_row; r++)
|
|
{
|
|
//---
|
|
line = general_arr[r];
|
|
|
|
//---
|
|
if(curr_line >= str_start_line)
|
|
{
|
|
//---
|
|
if((dt = StringSplit(line, sep, arr)) < 1)
|
|
{
|
|
LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL);
|
|
return m;
|
|
}
|
|
|
|
//---
|
|
m.Resize(curr_row + 1, dt);
|
|
for(int i = 0; i < dt; i++)
|
|
{
|
|
m[curr_row][i] = double(arr[i]);
|
|
}
|
|
|
|
//---
|
|
curr_row++;
|
|
}
|
|
|
|
//---
|
|
curr_line++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
//---
|
|
for(int r = 0; r < total_row; r++)
|
|
{
|
|
//---
|
|
line = general_arr[r];
|
|
|
|
//---
|
|
if(curr_line >= str_start_line)
|
|
{
|
|
//---
|
|
if((dt = StringSplit(line, sep, arr)) < 1)
|
|
{
|
|
LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL);
|
|
return m;
|
|
}
|
|
|
|
//---
|
|
m.Resize(curr_row + 1, dt);
|
|
for(int i = 0; i < dt; i++)
|
|
{
|
|
m[curr_row][i] = double(arr[i]);
|
|
}
|
|
|
|
//---
|
|
curr_row++;
|
|
}
|
|
|
|
//---
|
|
curr_line++;
|
|
if(curr_line > str_end_line)
|
|
break;
|
|
}
|
|
}
|
|
|
|
//---
|
|
return fit_transform(m, save_data);
|
|
}
|
|
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| Standardization Scaler |
|
|
//+------------------------------------------------------------------+
|
|
class StandardizationScaler : public ScalerBase
|
|
{
|
|
protected:
|
|
vector mean, std;
|
|
bool Save() override;
|
|
bool Load(string prefix_name) override;
|
|
|
|
public:
|
|
StandardizationScaler() : ScalerBase() { this.prefix_file = "_mean_std.csv"; }
|
|
|
|
matrix fit_transform(const matrix &X, bool save_data) override;
|
|
vector fit_transform(const vector &X) override; //Para vectores no se guarda data
|
|
matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
};
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
bool StandardizationScaler::Save()
|
|
{
|
|
FileDelete(this.file_name_out);
|
|
ResetLastError();
|
|
|
|
int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON);
|
|
if(handle == INVALID_HANDLE)
|
|
{
|
|
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
FileWrite(handle, vector_to_string(this.mean));
|
|
FileWrite(handle, vector_to_string(this.std));
|
|
FileWrite(handle, count_cols);
|
|
FileWrite(handle, start_col);
|
|
FileWrite(handle, excluyed_cols);
|
|
FileWrite(handle, (int)use_custom);
|
|
|
|
FileClose(handle);
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
bool StandardizationScaler::Load(string prefix_name)
|
|
{
|
|
this.file_name_out = prefix_name + this.prefix_file;
|
|
|
|
ResetLastError();
|
|
int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n");
|
|
if(handle == INVALID_HANDLE)
|
|
{
|
|
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
this.mean = string_to_vector(FileReadString(handle));
|
|
this.std = string_to_vector(FileReadString(handle));
|
|
this.count_cols = (ulong)StringToInteger(FileReadString(handle));
|
|
this.start_col = (ulong)StringToInteger(FileReadString(handle));
|
|
this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle));
|
|
this.use_custom = (bool)StringToInteger(FileReadString(handle));
|
|
|
|
FileClose(handle);
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
vector StandardizationScaler::fit_transform(const vector &X)
|
|
{
|
|
if(loaded_scaler)
|
|
{
|
|
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
|
|
return transform(X);
|
|
}
|
|
|
|
//---
|
|
if(use_custom)
|
|
{
|
|
if(!CheckSizeCustom(X))
|
|
return X;
|
|
}
|
|
else
|
|
{
|
|
if(!CheckSizeExcluded(X))
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
vector X_to_scaled = ExtractVectorToScale(X);
|
|
|
|
//---
|
|
double mean_val = X_to_scaled.Mean();
|
|
double std_val = X_to_scaled.Std();
|
|
|
|
if(std_val < SCALER_BASE_MIN_VALUE)
|
|
std_val = 1.0;
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_scaled.Size(); i++)
|
|
X_to_scaled[i] = (X_to_scaled[i] - mean_val) / std_val;
|
|
|
|
//---
|
|
return ReconstructVector(X, X_to_scaled);
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
matrix StandardizationScaler::fit_transform(const matrix &X, bool save_data)
|
|
{
|
|
if(loaded_scaler)
|
|
{
|
|
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
|
|
return transform(X);
|
|
}
|
|
|
|
LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL);
|
|
|
|
//---
|
|
if(use_custom)
|
|
{
|
|
if(!CheckSizeCustom(X))
|
|
return X;
|
|
}
|
|
else
|
|
{
|
|
if(!CheckSizeExcluded(X))
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
matrix X_to_scale = ExtractMatrixToScale(X);
|
|
const ulong X_to_scale_cols = X_to_scale.Cols();
|
|
const ulong X_to_scale_rows = X_to_scale.Rows();
|
|
|
|
LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale.Cols()), FUNCION_ACTUAL);
|
|
|
|
//---
|
|
vector mean_cts(X_to_scale_cols);
|
|
vector std_cts(X_to_scale_cols);
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_scale_cols; i++)
|
|
{
|
|
mean_cts[i] = X_to_scale.Col(i).Mean();
|
|
std_cts[i] = X_to_scale.Col(i).Std();
|
|
|
|
// Evitar división por cero
|
|
if(std_cts[i] < SCALER_BASE_MIN_VALUE)
|
|
{
|
|
LogWarning(StringFormat("Columna %I64u tiene std muy pequeño (%.2e), usando 1.0", i, std_cts[i]), FUNCION_ACTUAL);
|
|
std_cts[i] = 1.0;
|
|
}
|
|
}
|
|
|
|
//---
|
|
for(ulong row = 0; row < X_to_scale_rows; row++)
|
|
for(ulong col = 0; col < X_to_scale_cols; col++)
|
|
X_to_scale[row][col] = (X_to_scale[row][col] - mean_cts[col]) / std_cts[col];
|
|
|
|
//---
|
|
if(save_data)
|
|
{
|
|
this.mean = mean_cts;
|
|
this.std = std_cts;
|
|
}
|
|
|
|
//--- Aqui siempre se reconstruye
|
|
return ReconstructMatrix(X, X_to_scale);
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
matrix StandardizationScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
if(use_custom)
|
|
{
|
|
if(!CheckSizeCustom(X))
|
|
return X;
|
|
}
|
|
else
|
|
{
|
|
if(!CheckSizeExcluded(X))
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X;
|
|
const ulong X_to_scale_cols = X_to_scale.Cols();
|
|
const ulong X_to_scale_rows = X_to_scale.Rows();
|
|
|
|
//---
|
|
if(X_to_scale_cols != this.mean.Size())
|
|
{
|
|
LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale_cols, this.mean.Size()), FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
for(ulong row = 0; row < X_to_scale_rows; row++)
|
|
for(ulong col = 0; col < X_to_scale_cols; col++)
|
|
X_to_scale[row][col] = (X_to_scale[row][col] - this.mean[col]) / this.std[col];
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructMatrix(X, X_to_scale) : X_to_scale;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
vector StandardizationScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X;
|
|
const ulong X_to_scale_size = X_to_scale.Size();
|
|
|
|
if(X_to_scale_size != this.mean.Size())
|
|
{
|
|
LogError(StringFormat("Elementos a escalar %I64 != elementos entrenados %u", X_to_scale_size, this.mean.Size()), FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_scale_size; i++)
|
|
X_to_scale[i] = (X_to_scale[i] - this.mean[i]) / this.std[i];
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructVector(X, X_to_scale) : X_to_scale;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
matrix StandardizationScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
matrix X_to_unscale = (solo_escalar_lo_previsto) ? ExtractMatrixToScale(X_scaled) : X_scaled;
|
|
const ulong X_to_unscale_cols = X_to_unscale.Cols();
|
|
const ulong X_to_unscale_rows = X_to_unscale.Rows();
|
|
|
|
//---
|
|
if(X_to_unscale_cols != this.mean.Size())
|
|
{
|
|
LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale_cols, this.mean.Size()), FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
for(ulong row = 0; row < X_to_unscale_rows; row++)
|
|
for(ulong col = 0; col < X_to_unscale_cols; col++)
|
|
X_to_unscale[row][col] = X_to_unscale[row][col] * this.std[col] + this.mean[col];
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructMatrix(X_scaled, X_to_unscale) : X_to_unscale;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
vector StandardizationScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
vector X_to_unscale = (solo_escalar_lo_previsto) ? ExtractVectorToScale(X_scaled) : X_scaled;
|
|
const ulong X_to_unscale_size = X_to_unscale.Size();
|
|
|
|
//---
|
|
if(X_to_unscale_size != this.mean.Size())
|
|
{
|
|
LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale_size, this.mean.Size()), FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_unscale_size; i++)
|
|
X_to_unscale[i] = X_to_unscale[i] * this.std[i] + this.mean[i];
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructVector(X_scaled, X_to_unscale) : X_to_unscale;
|
|
}
|
|
|
|
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| MaxMin Scaler |
|
|
//+------------------------------------------------------------------+
|
|
class MaxMinScaler : public ScalerBase
|
|
{
|
|
protected:
|
|
vector min_vals, max_vals;
|
|
|
|
bool Save() override;
|
|
bool Load(string prefix_name) override;
|
|
|
|
public:
|
|
MaxMinScaler() : ScalerBase() { this.prefix_file = "_min_max.csv"; }
|
|
|
|
vector fit_transform(const vector &X) override;
|
|
matrix fit_transform(const matrix &X, bool save_data) override;
|
|
matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
};
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
bool MaxMinScaler::Save()
|
|
{
|
|
FileDelete(this.file_name_out);
|
|
ResetLastError();
|
|
|
|
//---
|
|
int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON);
|
|
if(handle == INVALID_HANDLE)
|
|
{
|
|
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
//---
|
|
FileWrite(handle, vector_to_string(this.min_vals));
|
|
FileWrite(handle, vector_to_string(this.max_vals));
|
|
FileWrite(handle, count_cols);
|
|
FileWrite(handle, start_col);
|
|
FileWrite(handle, excluyed_cols);
|
|
FileWrite(handle, (int)use_custom);
|
|
|
|
//---
|
|
FileClose(handle);
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
bool MaxMinScaler::Load(string prefix_name)
|
|
{
|
|
this.file_name_out = prefix_name + this.prefix_file;
|
|
|
|
//---
|
|
ResetLastError();
|
|
int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n");
|
|
if(handle == INVALID_HANDLE)
|
|
{
|
|
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
//---
|
|
this.min_vals = string_to_vector(FileReadString(handle));
|
|
this.max_vals = string_to_vector(FileReadString(handle));
|
|
this.count_cols = (ulong)StringToInteger(FileReadString(handle));
|
|
this.start_col = (ulong)StringToInteger(FileReadString(handle));
|
|
this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle));
|
|
this.use_custom = (bool)StringToInteger(FileReadString(handle));
|
|
|
|
//---
|
|
FileClose(handle);
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
vector MaxMinScaler::fit_transform(const vector &X)
|
|
{
|
|
if(loaded_scaler)
|
|
{
|
|
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
|
|
return transform(X);
|
|
}
|
|
|
|
//---
|
|
if(use_custom)
|
|
{
|
|
if(!CheckSizeCustom(X))
|
|
return X;
|
|
}
|
|
else
|
|
{
|
|
if(!CheckSizeExcluded(X))
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
vector X_to_scale = ExtractVectorToScale(X);
|
|
|
|
//---
|
|
double max = X_to_scale.Max();
|
|
double min = X_to_scale.Min();
|
|
if(max - min < SCALER_BASE_MIN_VALUE)
|
|
max = min + 1.0;
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_scale.Size(); i++)
|
|
X_to_scale[i] = (X_to_scale[i] - min) / (max - min);
|
|
|
|
//---
|
|
return ReconstructVector(X, X_to_scale);
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
matrix MaxMinScaler::fit_transform(const matrix &X, bool save_data)
|
|
{
|
|
if(loaded_scaler)
|
|
{
|
|
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
|
|
return transform(X);
|
|
}
|
|
|
|
LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL);
|
|
|
|
//---
|
|
if(use_custom)
|
|
{
|
|
if(!CheckSizeCustom(X))
|
|
return X;
|
|
}
|
|
else
|
|
{
|
|
if(!CheckSizeExcluded(X))
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
matrix X_to_scale = ExtractMatrixToScale(X);
|
|
const ulong X_to_scale_cols = X_to_scale.Cols();
|
|
const ulong X_to_scale_rows = X_to_scale.Rows();
|
|
|
|
LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale_cols), FUNCION_ACTUAL);
|
|
|
|
//---
|
|
vector min_vals_cts(X_to_scale_cols);
|
|
vector max_vals_cts(X_to_scale_cols);
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_scale_cols; i++)
|
|
{
|
|
min_vals_cts[i] = X_to_scale.Col(i).Min();
|
|
max_vals_cts[i] = X_to_scale.Col(i).Max();
|
|
|
|
if(fabs(max_vals_cts[i] - min_vals_cts[i]) < SCALER_BASE_MIN_VALUE)
|
|
{
|
|
LogWarning(StringFormat("Columna %I64u tiene rango muy pequeño (%.2e), usando rango 1.0", i, max_vals_cts[i] - min_vals_cts[i]), FUNCION_ACTUAL);
|
|
max_vals_cts[i] = min_vals_cts[i] + 1.0;
|
|
}
|
|
}
|
|
|
|
//---
|
|
//Print(min_vals_cts);
|
|
|
|
//---
|
|
for(ulong row = 0; row < X_to_scale_rows; row++)
|
|
for(ulong col = 0; col < X_to_scale_cols; col++)
|
|
X_to_scale[row][col] = (X_to_scale[row][col] - min_vals_cts[col]) / (max_vals_cts[col] - min_vals_cts[col]);
|
|
|
|
//---
|
|
if(save_data)
|
|
{
|
|
this.min_vals = min_vals_cts;
|
|
this.max_vals = max_vals_cts;
|
|
}
|
|
|
|
//--- Aqui siempre se reconstruye
|
|
return ReconstructMatrix(X, X_to_scale);
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
matrix MaxMinScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
if(use_custom)
|
|
{
|
|
if(!CheckSizeCustom(X))
|
|
return X;
|
|
}
|
|
else
|
|
{
|
|
if(!CheckSizeExcluded(X))
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X;
|
|
const ulong X_to_scale_cols = X_to_scale.Cols();
|
|
const ulong X_to_scale_rows = X_to_scale.Rows();
|
|
|
|
//---
|
|
if(X_to_scale_cols != this.min_vals.Size())
|
|
{
|
|
LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale_cols, this.min_vals.Size()), FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
for(ulong row = 0; row < X_to_scale_rows; row++)
|
|
for(ulong col = 0; col < X_to_scale_cols; col++)
|
|
X_to_scale[row][col] = (X_to_scale[row][col] - this.min_vals[col]) / (this.max_vals[col] - this.min_vals[col]);
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructMatrix(X, X_to_scale) : X_to_scale;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
vector MaxMinScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X;
|
|
const ulong X_to_scale_size = X_to_scale.Size();
|
|
|
|
if(X_to_scale_size != this.min_vals.Size())
|
|
{
|
|
LogError(StringFormat("Elementos a escalar %I64u != elementos entrenados %u", X_to_scale_size, this.min_vals.Size()), FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_scale_size; i++)
|
|
X_to_scale[i] = (X_to_scale[i] - this.min_vals[i]) / (this.max_vals[i] - this.min_vals[i]);
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructVector(X, X_to_scale) : X_to_scale;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
matrix MaxMinScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
matrix X_to_unscale = (solo_escalar_lo_previsto) ? ExtractMatrixToScale(X_scaled) : X_scaled;
|
|
const ulong X_to_unscale_cols = X_to_unscale.Cols();
|
|
const ulong X_to_unscale_rows = X_to_unscale.Rows();
|
|
|
|
//---
|
|
if(X_to_unscale_cols != this.min_vals.Size())
|
|
{
|
|
LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale_cols, this.min_vals.Size()), FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
for(ulong row = 0; row < X_to_unscale_rows; row++)
|
|
for(ulong col = 0; col < X_to_unscale_cols; col++)
|
|
X_to_unscale[row][col] = X_to_unscale[row][col] * (this.max_vals[col] - this.min_vals[col]) + this.min_vals[col];
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructMatrix(X_scaled, X_to_unscale) : X_to_unscale;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
vector MaxMinScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
vector X_to_unscale = (solo_escalar_lo_previsto) ? ExtractVectorToScale(X_scaled) : X_scaled;
|
|
const ulong X_to_unscale_size = X_to_unscale.Size();
|
|
|
|
//---
|
|
if(X_to_unscale_size != this.min_vals.Size())
|
|
{
|
|
LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale_size, this.min_vals.Size()), FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_unscale_size; i++)
|
|
X_to_unscale[i] = X_to_unscale[i] * (this.max_vals[i] - this.min_vals[i]) + this.min_vals[i];
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructVector(X_scaled, X_to_unscale) : X_to_unscale;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| Robust Scaler |
|
|
//+------------------------------------------------------------------+
|
|
class RobustScaler : public ScalerBase
|
|
{
|
|
protected:
|
|
vector medians, iqrs;
|
|
|
|
bool Save() override;
|
|
bool Load(string prefix_name) override;
|
|
|
|
public:
|
|
RobustScaler() : ScalerBase() { this.prefix_file = "_median_iqr.csv"; }
|
|
|
|
vector fit_transform(const vector &X) override;
|
|
matrix fit_transform(const matrix &X, bool save_data) override;
|
|
matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override;
|
|
};
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
bool RobustScaler::Save()
|
|
{
|
|
FileDelete(this.file_name_out);
|
|
ResetLastError();
|
|
|
|
//---
|
|
int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON);
|
|
if(handle == INVALID_HANDLE)
|
|
{
|
|
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
//---
|
|
FileWrite(handle, vector_to_string(this.medians));
|
|
FileWrite(handle, vector_to_string(this.iqrs));
|
|
FileWrite(handle, count_cols);
|
|
FileWrite(handle, start_col);
|
|
FileWrite(handle, excluyed_cols);
|
|
FileWrite(handle, (int)use_custom);
|
|
|
|
//---
|
|
FileClose(handle);
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
bool RobustScaler::Load(string prefix_name)
|
|
{
|
|
this.file_name_out = prefix_name + this.prefix_file;
|
|
|
|
//---
|
|
ResetLastError();
|
|
int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n");
|
|
if(handle == INVALID_HANDLE)
|
|
{
|
|
LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL);
|
|
return false;
|
|
}
|
|
|
|
//---
|
|
this.medians = string_to_vector(FileReadString(handle));
|
|
this.iqrs = string_to_vector(FileReadString(handle));
|
|
this.count_cols = (ulong)StringToInteger(FileReadString(handle));
|
|
this.start_col = (ulong)StringToInteger(FileReadString(handle));
|
|
this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle));
|
|
this.use_custom = (bool)StringToInteger(FileReadString(handle));
|
|
|
|
//---
|
|
FileClose(handle);
|
|
return true;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
vector RobustScaler::fit_transform(const vector &X)
|
|
{
|
|
if(loaded_scaler)
|
|
{
|
|
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
|
|
return transform(X);
|
|
}
|
|
|
|
//---
|
|
if(use_custom)
|
|
{
|
|
if(!CheckSizeCustom(X))
|
|
return X;
|
|
}
|
|
else
|
|
{
|
|
if(!CheckSizeExcluded(X))
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
vector X_to_scale = ExtractVectorToScale(X);
|
|
|
|
//---
|
|
double medians_cts = X_to_scale.Median();
|
|
double q75 = X_to_scale.Percentile(75);
|
|
double q25 = X_to_scale.Percentile(25);
|
|
double iqrs_cts = q75 - q25;
|
|
|
|
if(fabs(iqrs_cts) < SCALER_BASE_MIN_VALUE)
|
|
iqrs_cts = 1.00;
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_scale.Size(); i++)
|
|
X_to_scale[i] = (X_to_scale[i] - medians_cts) / iqrs_cts;
|
|
|
|
//---
|
|
return ReconstructVector(X, X_to_scale);
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
matrix RobustScaler::fit_transform(const matrix &X, bool save_data)
|
|
{
|
|
if(loaded_scaler)
|
|
{
|
|
LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL);
|
|
return transform(X);
|
|
}
|
|
|
|
LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL);
|
|
|
|
//---
|
|
if(use_custom)
|
|
{
|
|
if(!CheckSizeCustom(X))
|
|
return X;
|
|
}
|
|
else
|
|
{
|
|
if(!CheckSizeExcluded(X))
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
matrix X_to_scale = ExtractMatrixToScale(X);
|
|
const ulong X_to_scale_cols = X_to_scale.Cols();
|
|
const ulong X_to_scale_rows = X_to_scale.Rows();
|
|
|
|
LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale_cols), FUNCION_ACTUAL);
|
|
|
|
//---
|
|
vector medians_cts(X_to_scale_cols);
|
|
vector iqrs_cts(X_to_scale_cols);
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_scale_cols; i++)
|
|
{
|
|
vector col = X_to_scale.Col(i);
|
|
medians_cts[i] = col.Median();
|
|
double q75 = col.Percentile(75);
|
|
double q25 = col.Percentile(25);
|
|
iqrs_cts[i] = q75 - q25;
|
|
|
|
if(fabs(iqrs_cts[i]) < SCALER_BASE_MIN_VALUE)
|
|
{
|
|
LogWarning(StringFormat("Columna %I64u tiene IQR muy pequeño (%.2e), usando 1.0", i, iqrs_cts[i]), FUNCION_ACTUAL);
|
|
iqrs_cts[i] = 1.0;
|
|
}
|
|
}
|
|
|
|
//---
|
|
for(ulong row = 0; row < X_to_scale_rows; row++)
|
|
for(ulong col = 0; col < X_to_scale_cols; col++)
|
|
X_to_scale[row][col] = (X_to_scale[row][col] - medians_cts[col]) / iqrs_cts[col];
|
|
|
|
//---
|
|
if(save_data)
|
|
{
|
|
this.medians = medians_cts;
|
|
this.iqrs = iqrs_cts;
|
|
}
|
|
|
|
//--- Aqui siempre se reconstruye
|
|
return ReconstructMatrix(X, X_to_scale);
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
matrix RobustScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
if(use_custom)
|
|
{
|
|
if(!CheckSizeCustom(X))
|
|
return X;
|
|
}
|
|
else
|
|
{
|
|
if(!CheckSizeExcluded(X))
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X;
|
|
const ulong X_to_scale_cols = X_to_scale.Cols();
|
|
const ulong X_to_scale_rows = X_to_scale.Rows();
|
|
|
|
//---
|
|
if(X_to_scale_cols != this.medians.Size())
|
|
{
|
|
LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale_cols, this.medians.Size()), FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
for(ulong row = 0; row < X_to_scale_rows; row++)
|
|
for(ulong col = 0; col < X_to_scale_cols; col++)
|
|
X_to_scale[row][col] = (X_to_scale[row][col] - this.medians[col]) / this.iqrs[col];
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructMatrix(X, X_to_scale) : X_to_scale;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
vector RobustScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X;
|
|
const ulong X_to_scale_size = X_to_scale.Size();
|
|
|
|
if(X_to_scale_size != this.medians.Size())
|
|
{
|
|
LogError(StringFormat("Elementos a escalar %I64u != elementos entrenados %u", X_to_scale_size, this.medians.Size()), FUNCION_ACTUAL);
|
|
return X;
|
|
}
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_scale_size; i++)
|
|
X_to_scale[i] = (X_to_scale[i] - this.medians[i]) / this.iqrs[i];
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructVector(X, X_to_scale) : X_to_scale;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
//| |
|
|
//+------------------------------------------------------------------+
|
|
matrix RobustScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
matrix X_to_unscale = (solo_escalar_lo_previsto) ? ExtractMatrixToScale(X_scaled) : X_scaled;
|
|
const ulong X_to_unscale_cols = X_to_unscale.Cols();
|
|
const ulong X_to_unscale_rows = X_to_unscale.Rows();
|
|
|
|
//---
|
|
if(X_to_unscale_cols != this.medians.Size())
|
|
{
|
|
LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale_cols, this.medians.Size()), FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
for(ulong row = 0; row < X_to_unscale_rows; row++)
|
|
for(ulong col = 0; col < X_to_unscale_cols; col++)
|
|
X_to_unscale[row][col] = X_to_unscale[row][col] * this.iqrs[col] + this.medians[col];
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructMatrix(X_scaled, X_to_unscale) : X_to_unscale;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
vector RobustScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false)
|
|
{
|
|
if(!loaded_scaler)
|
|
{
|
|
LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
vector X_to_unscale = (solo_escalar_lo_previsto) ? ExtractVectorToScale(X_scaled) : X_scaled;
|
|
const ulong X_to_unscale_size = X_to_unscale.Size();
|
|
|
|
//---
|
|
if(X_to_unscale_size != this.medians.Size())
|
|
{
|
|
LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale_size, this.medians.Size()), FUNCION_ACTUAL);
|
|
return X_scaled;
|
|
}
|
|
|
|
//---
|
|
for(ulong i = 0; i < X_to_unscale_size; i++)
|
|
X_to_unscale[i] = X_to_unscale[i] * this.iqrs[i] + this.medians[i];
|
|
|
|
//---
|
|
return (reconstruir) ? ReconstructVector(X_scaled, X_to_unscale) : X_to_unscale;
|
|
}
|
|
|
|
//+------------------------------------------------------------------+
|
|
#endif // SCALERBYLEO_SCALER_BASE_MQH
|
|
//+------------------------------------------------------------------+
|