//+------------------------------------------------------------------+ //| ScalerBase.mqh | //| Copyright 2025, Leo. | //| https://www.mql5.com | //+------------------------------------------------------------------+ #property copyright "Copyright 2025, Leo." #property link "https://www.mql5.com/en/users/nique_372" #property strict #ifndef SCALER_BY_LEO_GEN_MQH #define SCALER_BY_LEO_GEN_MQH #include "..\\MQLArticles\\Utils\\Funciones Array.mqh" #include "..\\MQLArticles\\Utils\\File.mqh" //+------------------------------------------------------------------+ //| Scaler Base | //+------------------------------------------------------------------+ class ScalerBase : public CLoggerBase { protected: string file_name_out; string prefix_file; bool loaded_scaler; bool use_custom; //Bandera para saber si se usa custom (true) o excluyed (false) ulong start_col; ulong count_cols; ulong excluyed_cols; virtual bool Save() = 0; virtual bool Load(string prefix_name) = 0; //--- Métodos auxiliares bool CheckSizeCustom(const matrix &mtx) const; bool CheckSizeExcluded(const matrix &mtx) const; bool CheckSizeCustom(const vector &v) const; bool CheckSizeExcluded(const vector &v) const; //--- matrix ExtractMatrixToScale(const matrix &X) const; matrix ReconstructMatrix(const matrix &X_original, const matrix &X_scaled) const; vector ExtractVectorToScale(const vector &X) const; vector ReconstructVector(const vector &X_original, const vector &X_scaled) const; public: ScalerBase(void); ~ScalerBase(void) {} //--- inline void SetRangeEscaler(ulong start_col_, ulong count_col_); //Custom inline void SetRangeEscaler(ulong excluyed_cols_ = 1); //Mas simple, el usuario decide cuantas columnas empezando por atras se excluyen //--- inline bool save(string prefix_name); inline bool load(string prefix_name); //--- virtual matrix fit_transform(const matrix &X, bool save_data) = 0; virtual vector fit_transform(const vector &X) = 0; //Para vectores no se guarda data virtual matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0; virtual vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0; virtual matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0; virtual vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0; // --- Métodos comunes --- virtual inline string GetOutputFile() const final { return this.file_name_out; } }; //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ ScalerBase::ScalerBase(void) : start_col(0), count_cols(0), use_custom(false), excluyed_cols(1), file_name_out(NULL), loaded_scaler(false) { } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ inline void ScalerBase::SetRangeEscaler(ulong start_col_, ulong count_col_) { this.use_custom = true; this.count_cols = count_col_; this.start_col = start_col_; LogInfo(StringFormat("Configurado escalado CUSTOM: columnas %I64u a %I64u (%I64u columnas)", start_col_, start_col_ + count_col_ - 1, count_col_), FUNCION_ACTUAL); } //+------------------------------------------------------------------+ inline void ScalerBase::SetRangeEscaler(ulong excluyed_cols_ = 1) { this.excluyed_cols = excluyed_cols_; this.use_custom = false; LogInfo(StringFormat("Configurado escalado EXCLUDED: excluir últimas %I64u columnas", excluyed_cols_), FUNCION_ACTUAL); } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ bool ScalerBase::CheckSizeCustom(const matrix &mtx) const { if(start_col >= mtx.Cols()) { LogError(StringFormat("Columna de inicio %I64u >= total columnas %I64u", start_col, mtx.Cols()), FUNCION_ACTUAL); return false; } if(start_col + count_cols > mtx.Cols()) { LogError(StringFormat("Rango [%I64u:%I64u] excede columnas disponibles %I64u", start_col, start_col + count_cols - 1, mtx.Cols()), FUNCION_ACTUAL); return false; } return true; } //+------------------------------------------------------------------+ bool ScalerBase::CheckSizeExcluded(const matrix &mtx) const { if(mtx.Cols() < excluyed_cols) { LogError(StringFormat("Columnas a excluir %I64u >= total columnas %I64u", excluyed_cols, mtx.Cols()), FUNCION_ACTUAL); return false; } return true; } //+------------------------------------------------------------------+ bool ScalerBase::CheckSizeCustom(const vector &v) const { if(start_col >= v.Size()) { LogError(StringFormat("Columna de inicio %I64u >= tamaño total del vector%I64u", start_col, v.Size()), FUNCION_ACTUAL); return false; } if(start_col + count_cols > v.Size()) { LogError(StringFormat("Rango [%I64u:%I64u] excede el tamaño del vector %I64u", start_col, start_col + count_cols - 1, v.Size()), FUNCION_ACTUAL); return false; } return true; } //+------------------------------------------------------------------+ bool ScalerBase::CheckSizeExcluded(const vector &v) const { if(v.Size() < excluyed_cols) { LogError(StringFormat("Columnas a excluir %I64u >= tamaño del vector: %I64u", excluyed_cols, v.Size()), FUNCION_ACTUAL); return false; } return true; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ matrix ScalerBase::ExtractMatrixToScale(const matrix &X) const { matrix result; if(use_custom) { if(X.Cols() == count_cols) return X; result.Init(X.Rows(), count_cols); for(ulong row = 0; row < X.Rows(); row++) for(ulong col = 0; col < count_cols; col++) result[row][col] = X[row][start_col + col]; } else { if(excluyed_cols == 0) return X; ulong cols_to_scale = X.Cols() - excluyed_cols; result.Init(X.Rows(), cols_to_scale); for(ulong row = 0; row < X.Rows(); row++) for(ulong col = 0; col < cols_to_scale; col++) result[row][col] = X[row][col]; } return result; } //+------------------------------------------------------------------+ matrix ScalerBase::ReconstructMatrix(const matrix &X_original, const matrix &X_scaled) const { if(X_original.Rows() == X_scaled.Rows() && X_original.Cols() == X_scaled.Cols()) return X_scaled; matrix result = X_original; // Copia completa if(use_custom) { for(ulong row = 0; row < X_original.Rows(); row++) for(ulong col = 0; col < count_cols; col++) result[row][start_col + col] = X_scaled[row][col]; } else { for(ulong row = 0; row < X_original.Rows(); row++) for(ulong col = 0; col < X_scaled.Cols(); col++) result[row][col] = X_scaled[row][col]; } return result; } //+------------------------------------------------------------------+ vector ScalerBase::ExtractVectorToScale(const vector &X) const { vector result; if(use_custom) { if(X.Size() == count_cols) return X; // Extraer rango específico result.Resize(count_cols); for(ulong i = 0; i < count_cols; i++) result[i] = X[start_col + i]; } else { if(excluyed_cols == 0) return X; // Extraer todas excepto las últimas N ulong size_to_scale = X.Size() - excluyed_cols; result.Resize(size_to_scale); for(ulong i = 0; i < size_to_scale; i++) result[i] = X[i]; } return result; } //+------------------------------------------------------------------+ vector ScalerBase::ReconstructVector(const vector &X_original, const vector &X_scaled) const { if(X_original.Size() == X_scaled.Size()) return X_scaled; vector result = X_original; if(use_custom) { // Reemplazar rango específico for(ulong i = 0; i < count_cols; i++) result[start_col + i] = X_scaled[i]; } else { // Reemplazar todas excepto las últimas N for(ulong i = 0; i < X_scaled.Size(); i++) result[i] = X_scaled[i]; } return result; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ inline bool ScalerBase::load(string prefix_name) { loaded_scaler = true; return this.Load(prefix_name); } //+------------------------------------------------------------------+ inline bool ScalerBase::save(string prefix_name) { this.file_name_out = prefix_name + this.prefix_file; return this.Save(); } //+------------------------------------------------------------------+ //| Standardization Scaler | //+------------------------------------------------------------------+ class StandardizationScaler : public ScalerBase { protected: vector mean, std; bool Save() override; bool Load(string prefix_name) override; public: StandardizationScaler() : ScalerBase() { this.prefix_file = "_mean_std.csv"; } matrix fit_transform(const matrix &X, bool save_data) override; vector fit_transform(const vector &X) override; //Para vectores no se guarda data matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; }; //+------------------------------------------------------------------+ bool StandardizationScaler::Save() { FileDelete(this.file_name_out); ResetLastError(); int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } FileWrite(handle, vector_to_string(this.mean)); FileWrite(handle, vector_to_string(this.std)); FileWrite(handle, count_cols); FileWrite(handle, start_col); FileWrite(handle, excluyed_cols); FileWrite(handle, (int)use_custom); FileClose(handle); return true; } //+------------------------------------------------------------------+ bool StandardizationScaler::Load(string prefix_name) { this.file_name_out = prefix_name + this.prefix_file; ResetLastError(); int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n"); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } this.mean = string_to_vector(FileReadString(handle)); this.std = string_to_vector(FileReadString(handle)); this.count_cols = (ulong)StringToInteger(FileReadString(handle)); this.start_col = (ulong)StringToInteger(FileReadString(handle)); this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle)); this.use_custom = (bool)StringToInteger(FileReadString(handle)); FileClose(handle); return true; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ vector StandardizationScaler::fit_transform(const vector &X) { if(loaded_scaler) { LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- vector X_to_scaled = ExtractVectorToScale(X); //--- double mean_val = X_to_scaled.Mean(); double std_val = X_to_scaled.Std(); if(std_val < 1e-9) std_val = 1.0; //--- for(ulong i = 0; i < X_to_scaled.Size(); i++) X_to_scaled[i] = (X_to_scaled[i] - mean_val) / std_val; return ReconstructVector(X, X_to_scaled); } //+------------------------------------------------------------------+ matrix StandardizationScaler::fit_transform(const matrix &X, bool save_data) { if(loaded_scaler) { LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL); //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = ExtractMatrixToScale(X); LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale.Cols()), FUNCION_ACTUAL); vector mean_cts(X_to_scale.Cols()); vector std_cts(X_to_scale.Cols()); for(ulong i = 0; i < X_to_scale.Cols(); i++) { mean_cts[i] = X_to_scale.Col(i).Mean(); std_cts[i] = X_to_scale.Col(i).Std(); // Evitar división por cero if(std_cts[i] < 1e-9) { LogWarning(StringFormat("Columna %I64u tiene std muy pequeño (%.2e), usando 1.0", i, std_cts[i]), FUNCION_ACTUAL); std_cts[i] = 1.0; } } //--- matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols()); for(ulong row = 0; row < X_to_scale.Rows(); row++) for(ulong col = 0; col < X_to_scale.Cols(); col++) X_scaled[row][col] = (X_to_scale[row][col] - mean_cts[col]) / std_cts[col]; if(save_data) { this.mean = mean_cts; this.std = std_cts; } //--- Aqui siempre se reconstruye return ReconstructMatrix(X, X_scaled); } //+------------------------------------------------------------------+ matrix StandardizationScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL); return X; } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X; if(X_to_scale.Cols() != this.mean.Size()) { LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale.Cols(), this.mean.Size()), FUNCION_ACTUAL); return X; } //--- matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols()); for(ulong row = 0; row < X_to_scale.Rows(); row++) for(ulong col = 0; col < X_to_scale.Cols(); col++) X_scaled[row][col] = (X_to_scale[row][col] - this.mean[col]) / this.std[col]; //--- if(reconstruir) return ReconstructMatrix(X, X_scaled); else return X_scaled; } //+------------------------------------------------------------------+ vector StandardizationScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL); return X; } //--- vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X; if(X_to_scale.Size() != this.mean.Size()) { LogError(StringFormat("Elementos a escalar %I64 != elementos entrenados %u", X_to_scale.Size(), this.mean.Size()), FUNCION_ACTUAL); return X; } //--- vector X_scaled(X_to_scale.Size()); for(ulong i = 0; i < X_to_scale.Size(); i++) X_scaled[i] = (X_to_scale[i] - this.mean[i]) / this.std[i]; //--- if(reconstruir) return ReconstructVector(X, X_scaled); else return X_scaled; } //+------------------------------------------------------------------+ matrix StandardizationScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } //--- matrix X_to_unscale = (solo_escalar_lo_previsto) ? ExtractMatrixToScale(X_scaled) : X_scaled; //--- if(X_to_unscale.Cols() != this.mean.Size()) { LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale.Cols(), this.mean.Size()), FUNCION_ACTUAL); return X_scaled; } //--- matrix X_unscaled(X_to_unscale.Rows(), X_to_unscale.Cols()); for(ulong row = 0; row < X_to_unscale.Rows(); row++) for(ulong col = 0; col < X_to_unscale.Cols(); col++) X_unscaled[row][col] = X_to_unscale[row][col] * this.std[col] + this.mean[col]; //--- if(reconstruir) return ReconstructMatrix(X_scaled, X_unscaled); else return X_unscaled; } //+------------------------------------------------------------------+ vector StandardizationScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } //--- vector X_to_unscale; if(solo_escalar_lo_previsto) X_to_unscale = ExtractVectorToScale(X_scaled); else X_to_unscale = X_scaled; //--- if(X_to_unscale.Size() != this.mean.Size()) { LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale.Size(), this.mean.Size()), FUNCION_ACTUAL); return X_scaled; } //--- vector X_unscaled(X_to_unscale.Size()); for(ulong i = 0; i < X_to_unscale.Size(); i++) { X_unscaled[i] = X_to_unscale[i] * this.std[i] + this.mean[i]; } //--- if(reconstruir) return ReconstructVector(X_scaled, X_unscaled); else return X_unscaled; } //+------------------------------------------------------------------+ //| MaxMin Scaler | //+------------------------------------------------------------------+ class MaxMinScaler : public ScalerBase { protected: vector min_vals, max_vals; bool Save() override; bool Load(string prefix_name) override; public: MaxMinScaler() : ScalerBase() { this.prefix_file = "_min_max.csv"; } vector fit_transform(const vector &X) override; //Para vectores no se guarda data matrix fit_transform(const matrix &X, bool save_data) override; matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; }; //+------------------------------------------------------------------+ bool MaxMinScaler::Save() { FileDelete(this.file_name_out); ResetLastError(); int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d - Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } FileWrite(handle, vector_to_string(this.min_vals)); FileWrite(handle, vector_to_string(this.max_vals)); FileWrite(handle, count_cols); FileWrite(handle, start_col); FileWrite(handle, excluyed_cols); FileWrite(handle, (int)use_custom); FileClose(handle); return true; } //+------------------------------------------------------------------+ bool MaxMinScaler::Load(string prefix_name) { this.file_name_out = prefix_name + this.prefix_file; ResetLastError(); int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n"); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d - Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } this.min_vals = string_to_vector(FileReadString(handle)); this.max_vals = string_to_vector(FileReadString(handle)); this.count_cols = (ulong)StringToInteger(FileReadString(handle)); this.start_col = (ulong)StringToInteger(FileReadString(handle)); this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle)); this.use_custom = (bool)StringToInteger(FileReadString(handle)); FileClose(handle); return true; } //+------------------------------------------------------------------+ vector MaxMinScaler::fit_transform(const vector &X) { if(loaded_scaler) { LogWarning("Este es un escalador cargado | no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- vector X_to_scale = ExtractVectorToScale(X); //--- double max = X_to_scale.Max(); double min = X_to_scale.Min(); if(max - min < 1e-10) max = min + 1.0; //--- for(ulong i = 0; i < X_to_scale.Size(); i++) X_to_scale[i] = (X_to_scale[i] - min) / (max - min); //--- return ReconstructVector(X, X_to_scale); } //+------------------------------------------------------------------+ matrix MaxMinScaler::fit_transform(const matrix &X, bool save_data) { if(loaded_scaler) { LogWarning("Este es un escalador cargado | no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL); //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = ExtractMatrixToScale(X); LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale.Cols()), FUNCION_ACTUAL); //--- vector min_vals_cts(X_to_scale.Cols()); vector max_vals_cts(X_to_scale.Cols()); for(ulong i = 0; i < X_to_scale.Cols(); i++) { min_vals_cts[i] = X_to_scale.Col(i).Min(); max_vals_cts[i] = X_to_scale.Col(i).Max(); if(fabs(max_vals_cts[i] - min_vals_cts[i]) < 1e-10) { LogWarning(StringFormat("Columna %I64u tiene rango muy pequeño (%.2e), usando rango 1.0", i, max_vals_cts[i] - min_vals_cts[i]), FUNCION_ACTUAL); max_vals_cts[i] = min_vals_cts[i] + 1.0; } } //--- matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols()); for(ulong row = 0; row < X_to_scale.Rows(); row++) for(ulong col = 0; col < X_to_scale.Cols(); col++) X_scaled[row][col] = (X_to_scale[row][col] - min_vals_cts[col]) / (max_vals_cts[col] - min_vals_cts[col]); if(save_data) { this.min_vals = min_vals_cts; this.max_vals = max_vals_cts; } //--- return ReconstructMatrix(X, X_scaled); } //+------------------------------------------------------------------+ matrix MaxMinScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform o load()", FUNCION_ACTUAL); return X; } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X; if(X_to_scale.Cols() != this.min_vals.Size()) { LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale.Cols(), this.min_vals.Size()), FUNCION_ACTUAL); return X; } //--- matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols()); for(ulong row = 0; row < X_to_scale.Rows(); row++) for(ulong col = 0; col < X_to_scale.Cols(); col++) X_scaled[row][col] = (X_to_scale[row][col] - this.min_vals[col]) / (this.max_vals[col] - this.min_vals[col]); //--- if(reconstruir) return ReconstructMatrix(X, X_scaled); else return X_scaled; } //+------------------------------------------------------------------+ vector MaxMinScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform o load()", FUNCION_ACTUAL); return X; } //--- vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X; if(X_to_scale.Size() != this.min_vals.Size()) { LogError(StringFormat("Elementos a escalar %I64u != elementos entrenados %u", X_to_scale.Size(), this.min_vals.Size()), FUNCION_ACTUAL); return X; } //--- vector X_scaled(X_to_scale.Size()); for(ulong i = 0; i < X_to_scale.Size(); i++) X_scaled[i] = (X_to_scale[i] - this.min_vals[i]) / (this.max_vals[i] - this.min_vals[i]); //--- if(reconstruir) return ReconstructVector(X, X_scaled); else return X_scaled; } //+------------------------------------------------------------------+ matrix MaxMinScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } //--- matrix X_to_unscale; if(solo_escalar_lo_previsto) X_to_unscale = ExtractMatrixToScale(X_scaled); else X_to_unscale = X_scaled; //--- if(X_to_unscale.Cols() != this.min_vals.Size()) { LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale.Cols(), this.min_vals.Size()), FUNCION_ACTUAL); return X_scaled; } //--- matrix X_unscaled(X_to_unscale.Rows(), X_to_unscale.Cols()); for(ulong row = 0; row < X_to_unscale.Rows(); row++) { for(ulong col = 0; col < X_to_unscale.Cols(); col++) { X_unscaled[row][col] = X_to_unscale[row][col] * (this.max_vals[col] - this.min_vals[col]) + this.min_vals[col]; } } //--- if(reconstruir) return ReconstructMatrix(X_scaled, X_unscaled); else return X_unscaled; } //+------------------------------------------------------------------+ vector MaxMinScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } //--- vector X_to_unscale; if(solo_escalar_lo_previsto) X_to_unscale = ExtractVectorToScale(X_scaled); else X_to_unscale = X_scaled; if(X_to_unscale.Size() != this.min_vals.Size()) { LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale.Size(), this.min_vals.Size()), FUNCION_ACTUAL); return X_scaled; } //--- vector X_unscaled(X_to_unscale.Size()); for(ulong i = 0; i < X_to_unscale.Size(); i++) X_unscaled[i] = X_to_unscale[i] * (this.max_vals[i] - this.min_vals[i]) + this.min_vals[i]; //--- if(reconstruir) return ReconstructVector(X_scaled, X_unscaled); else return X_unscaled; } //+------------------------------------------------------------------+ //| Robust Scaler | //+------------------------------------------------------------------+ class RobustScaler : public ScalerBase { protected: vector medians, iqrs; bool Save() override; bool Load(string prefix_name) override; public: RobustScaler() : ScalerBase() { this.prefix_file = "_median_iqr.csv"; } vector fit_transform(const vector &X) override; //Para vectores no se guarda data matrix fit_transform(const matrix &X, bool save_data) override; matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; }; //+------------------------------------------------------------------+ bool RobustScaler::Save() { FileDelete(this.file_name_out); ResetLastError(); int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d - Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } FileWrite(handle, vector_to_string(this.medians)); FileWrite(handle, vector_to_string(this.iqrs)); FileWrite(handle, count_cols); FileWrite(handle, start_col); FileWrite(handle, excluyed_cols); FileWrite(handle, (int)use_custom); FileClose(handle); return true; } //+------------------------------------------------------------------+ bool RobustScaler::Load(string prefix_name) { this.file_name_out = prefix_name + this.prefix_file; ResetLastError(); int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n"); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d - Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } this.medians = string_to_vector(FileReadString(handle)); this.iqrs = string_to_vector(FileReadString(handle)); this.count_cols = (ulong)StringToInteger(FileReadString(handle)); this.start_col = (ulong)StringToInteger(FileReadString(handle)); this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle)); this.use_custom = (bool)StringToInteger(FileReadString(handle)); FileClose(handle); return true; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ vector RobustScaler::fit_transform(const vector &X) { if(loaded_scaler) { LogWarning("Este es un escalador cargado | no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- vector X_to_scale = ExtractVectorToScale(X); //--- double medians_cts = X_to_scale.Median(); double q75 = X_to_scale.Percentile(75); double q25 = X_to_scale.Percentile(25); double iqrs_cts = q75 - q25; if(fabs(iqrs_cts) < 1e-10) iqrs_cts = 1.00; //--- for(ulong i = 0; i < X_to_scale.Size(); i++) X_to_scale[i] = (X_to_scale[i] - medians_cts) / iqrs_cts; //--- return ReconstructVector(X, X_to_scale); } //+------------------------------------------------------------------+ matrix RobustScaler::fit_transform(const matrix &X, bool save_data) { if(loaded_scaler) { LogWarning("Este es un escalador cargado | no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL); //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = ExtractMatrixToScale(X); LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale.Cols()), FUNCION_ACTUAL); //--- vector medians_cts(X_to_scale.Cols()); vector iqrs_cts(X_to_scale.Cols()); //--- for(ulong i = 0; i < X_to_scale.Cols(); i++) { vector col = X_to_scale.Col(i); medians_cts[i] = col.Median(); double q75 = col.Percentile(75); double q25 = col.Percentile(25); iqrs_cts[i] = q75 - q25; if(fabs(iqrs_cts[i]) < 1e-10) { LogWarning(StringFormat("Columna %I64u tiene IQR muy pequeño (%.2e), usando 1.0", i, iqrs_cts[i]), FUNCION_ACTUAL); iqrs_cts[i] = 1.0; } } //--- matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols()); for(ulong row = 0; row < X_to_scale.Rows(); row++) for(ulong col = 0; col < X_to_scale.Cols(); col++) X_scaled[row][col] = (X_to_scale[row][col] - medians_cts[col]) / iqrs_cts[col]; if(save_data) { this.medians = medians_cts; this.iqrs = iqrs_cts; } //--- return ReconstructMatrix(X, X_scaled); } //+------------------------------------------------------------------+ matrix RobustScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform o load()", FUNCION_ACTUAL); return X; } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X; if(X_to_scale.Cols() != this.medians.Size()) { LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale.Cols(), this.medians.Size()), FUNCION_ACTUAL); return X; } //--- matrix X_scaled(X_to_scale.Rows(), X_to_scale.Cols()); for(ulong row = 0; row < X_to_scale.Rows(); row++) for(ulong col = 0; col < X_to_scale.Cols(); col++) X_scaled[row][col] = (X_to_scale[row][col] - this.medians[col]) / this.iqrs[col]; //--- if(reconstruir) return ReconstructMatrix(X, X_scaled); else return X_scaled; } //+------------------------------------------------------------------+ vector RobustScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform o load()", FUNCION_ACTUAL); return X; } //--- vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X; if(X_to_scale.Size() != this.medians.Size()) { LogError(StringFormat("Elementos a escalar %I64u != elementos entrenados %u", X_to_scale.Size(), this.medians.Size()), FUNCION_ACTUAL); return X; } //--- vector X_scaled(X_to_scale.Size()); for(ulong i = 0; i < X_to_scale.Size(); i++) X_scaled[i] = (X_to_scale[i] - this.medians[i]) / this.iqrs[i]; //--- if(reconstruir) return ReconstructVector(X, X_scaled); else return X_scaled; } //+------------------------------------------------------------------+ matrix RobustScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } matrix X_to_unscale; if(solo_escalar_lo_previsto) X_to_unscale = ExtractMatrixToScale(X_scaled); else X_to_unscale = X_scaled; if(X_to_unscale.Cols() != this.medians.Size()) { LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale.Cols(), this.medians.Size()), FUNCION_ACTUAL); return X_scaled; } //--- matrix X_unscaled(X_to_unscale.Rows(), X_to_unscale.Cols()); for(ulong row = 0; row < X_to_unscale.Rows(); row++) for(ulong col = 0; col < X_to_unscale.Cols(); col++) X_unscaled[row][col] = X_to_unscale[row][col] * this.iqrs[col] + this.medians[col]; //--- if(reconstruir) return ReconstructMatrix(X_scaled, X_unscaled); else return X_unscaled; } //+------------------------------------------------------------------+ vector RobustScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } vector X_to_unscale; if(solo_escalar_lo_previsto) X_to_unscale = ExtractVectorToScale(X_scaled); else X_to_unscale = X_scaled; if(X_to_unscale.Size() != this.medians.Size()) { LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale.Size(), this.medians.Size()), FUNCION_ACTUAL); return X_scaled; } //--- vector X_unscaled(X_to_unscale.Size()); for(ulong i = 0; i < X_to_unscale.Size(); i++) X_unscaled[i] = X_to_unscale[i] * this.iqrs[i] + this.medians[i]; //--- if(reconstruir) return ReconstructVector(X_scaled, X_unscaled); else return X_unscaled; } //+------------------------------------------------------------------+ #endif