//+------------------------------------------------------------------+ //| ScalerBase.mqh | //| Copyright 2025, Leo. | //| https://www.mql5.com | //+------------------------------------------------------------------+ #property copyright "Copyright 2025, Leo." #property link "https://www.mql5.com/en/users/nique_372" #property strict #ifndef SCALERBYLEO_SCALER_BASE_MQH #define SCALERBYLEO_SCALER_BASE_MQH //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ #include "..\\..\\MQLArticles\\Utils\\Basic.mqh" #include "..\\..\\MQLArticles\\Utils\\File.mqh" //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ #define SCALER_BASE_FINAL_LINE -1 #define SCALER_BASE_MIN_VALUE 1e-10 //+------------------------------------------------------------------+ //| Scaler Base | //+------------------------------------------------------------------+ class ScalerBase : public CLoggerBase { protected: string file_name_out; string prefix_file; bool loaded_scaler; bool use_custom; //Bandera para saber si se usa custom (true) o excluyed (false) ulong start_col; ulong count_cols; ulong excluyed_cols; virtual bool Save() = 0; virtual bool Load(string prefix_name) = 0; //--- Métodos auxiliares bool CheckSizeCustom(const matrix &mtx) const; bool CheckSizeExcluded(const matrix &mtx) const; bool CheckSizeCustom(const vector &v) const; bool CheckSizeExcluded(const vector &v) const; //--- matrix ExtractMatrixToScale(const matrix &X) const; matrix ReconstructMatrix(const matrix &X_original, const matrix &X_scaled) const; vector ExtractVectorToScale(const vector &X) const; vector ReconstructVector(const vector &X_original, const vector &X_scaled) const; public: ScalerBase(void); ~ScalerBase(void) {} //--- inline void SetRangeEscaler(ulong start_col_, ulong count_col_); //Custom inline void SetRangeEscaler(ulong excluyed_cols_ = 1); //Mas simple, el usuario decide cuantas columnas empezando por atras se excluyen //--- inline bool save(string prefix_name); inline bool load(string prefix_name); //--- Transform //- Fit // Matrix virtual matrix fit_transform(const matrix &X, bool save_data) = 0; // Transorfmra un archivo bool fit_transform_file(const string &file_name, ushort sep, int extra_file_flags, bool save_data, uint file_code_page = CP_ACP, int start_line = 1); // Transforma un archivo y devuelve la matriz matrix fit_transform_by_file(const string& file_name, ushort sep, int file_flags, bool save_data, uint file_code_page = CP_ACP, int start_line = 1, int end_line = SCALER_BASE_FINAL_LINE); // Transforma un string matrix fit_transform_by_src(const string& src, ushort sep, bool save_data, int str_start_line = 1, int str_end_line = SCALER_BASE_FINAL_LINE); // Transforma string // Vector virtual vector fit_transform(const vector &X) = 0; //Para vectores no se guarda data //- Normal virtual matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0; virtual vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0; virtual matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0; virtual vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) = 0; // --- Métodos comunes --- virtual inline string GetOutputFile() const final { return this.file_name_out; } }; //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ ScalerBase::ScalerBase(void) : start_col(0), count_cols(0), use_custom(false), excluyed_cols(1), file_name_out(NULL), loaded_scaler(false) { } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ inline void ScalerBase::SetRangeEscaler(ulong start_col_, ulong count_col_) { this.use_custom = true; this.count_cols = count_col_; this.start_col = start_col_; LogInfo(StringFormat("Configurado escalado CUSTOM: columnas %I64u a %I64u (%I64u columnas)", start_col_, start_col_ + count_col_ - 1, count_col_), FUNCION_ACTUAL); } //+------------------------------------------------------------------+ inline void ScalerBase::SetRangeEscaler(ulong excluyed_cols_ = 1) { this.excluyed_cols = excluyed_cols_; this.use_custom = false; LogInfo(StringFormat("Configurado escalado EXCLUDED: excluir últimas %I64u columnas", excluyed_cols_), FUNCION_ACTUAL); } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ bool ScalerBase::CheckSizeCustom(const matrix &mtx) const { if(start_col >= mtx.Cols()) { LogError(StringFormat("Columna de inicio %I64u >= total columnas %I64u", start_col, mtx.Cols()), FUNCION_ACTUAL); return false; } if(start_col + count_cols > mtx.Cols()) { LogError(StringFormat("Rango [%I64u:%I64u] excede columnas disponibles %I64u", start_col, start_col + count_cols - 1, mtx.Cols()), FUNCION_ACTUAL); return false; } return true; } //+------------------------------------------------------------------+ bool ScalerBase::CheckSizeExcluded(const matrix &mtx) const { if(mtx.Cols() < excluyed_cols) { LogError(StringFormat("Columnas a excluir %I64u >= total columnas %I64u", excluyed_cols, mtx.Cols()), FUNCION_ACTUAL); return false; } return true; } //+------------------------------------------------------------------+ bool ScalerBase::CheckSizeCustom(const vector &v) const { if(start_col >= v.Size()) { LogError(StringFormat("Columna de inicio %I64u >= tamaño total del vector%I64u", start_col, v.Size()), FUNCION_ACTUAL); return false; } if(start_col + count_cols > v.Size()) { LogError(StringFormat("Rango [%I64u:%I64u] excede el tamaño del vector %I64u", start_col, start_col + count_cols - 1, v.Size()), FUNCION_ACTUAL); return false; } return true; } //+------------------------------------------------------------------+ bool ScalerBase::CheckSizeExcluded(const vector &v) const { if(v.Size() < excluyed_cols) { LogError(StringFormat("Columnas a excluir %I64u >= tamaño del vector: %I64u", excluyed_cols, v.Size()), FUNCION_ACTUAL); return false; } return true; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ matrix ScalerBase::ExtractMatrixToScale(const matrix &X) const { matrix result; if(use_custom) { if(X.Cols() == count_cols) return X; result.Init(X.Rows(), count_cols); for(ulong row = 0; row < X.Rows(); row++) for(ulong col = 0; col < count_cols; col++) result[row][col] = X[row][start_col + col]; } else { if(excluyed_cols == 0) return X; ulong cols_to_scale = X.Cols() - excluyed_cols; result.Init(X.Rows(), cols_to_scale); for(ulong row = 0; row < X.Rows(); row++) for(ulong col = 0; col < cols_to_scale; col++) result[row][col] = X[row][col]; } return result; } //+------------------------------------------------------------------+ matrix ScalerBase::ReconstructMatrix(const matrix &X_original, const matrix &X_scaled) const { if(X_original.Rows() == X_scaled.Rows() && X_original.Cols() == X_scaled.Cols()) return X_scaled; matrix result = X_original; // Copia completa if(use_custom) { for(ulong row = 0; row < X_original.Rows(); row++) for(ulong col = 0; col < count_cols; col++) result[row][start_col + col] = X_scaled[row][col]; } else { for(ulong row = 0; row < X_original.Rows(); row++) for(ulong col = 0; col < X_scaled.Cols(); col++) result[row][col] = X_scaled[row][col]; } return result; } //+------------------------------------------------------------------+ vector ScalerBase::ExtractVectorToScale(const vector &X) const { vector result; if(use_custom) { if(X.Size() == count_cols) return X; // Extraer rango específico result.Resize(count_cols); for(ulong i = 0; i < count_cols; i++) result[i] = X[start_col + i]; } else { if(excluyed_cols == 0) return X; // Extraer todas excepto las últimas N ulong size_to_scale = X.Size() - excluyed_cols; result.Resize(size_to_scale); for(ulong i = 0; i < size_to_scale; i++) result[i] = X[i]; } return result; } //+------------------------------------------------------------------+ vector ScalerBase::ReconstructVector(const vector &X_original, const vector &X_scaled) const { if(X_original.Size() == X_scaled.Size()) return X_scaled; vector result = X_original; if(use_custom) { // Reemplazar rango específico for(ulong i = 0; i < count_cols; i++) result[start_col + i] = X_scaled[i]; } else { // Reemplazar todas excepto las últimas N for(ulong i = 0; i < X_scaled.Size(); i++) result[i] = X_scaled[i]; } return result; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ inline bool ScalerBase::load(string prefix_name) { loaded_scaler = true; return this.Load(prefix_name); } //+------------------------------------------------------------------+ inline bool ScalerBase::save(string prefix_name) { this.file_name_out = prefix_name + this.prefix_file; return this.Save(); } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ /* Notas: - file_flags: puede tener los valores de FILE_ANSI, FILE_COMON, etc.. las banderas (FILE_READ) ya bienen por derfecto */ //+------------------------------------------------------------------+ matrix ScalerBase::fit_transform_by_file(const string& file_name, ushort sep, int file_flags, bool save_data, uint file_code_page = CP_ACP, int start_line = 1, int end_line = SCALER_BASE_FINAL_LINE) { //--- ResetLastError(); //--- matrix m = {}; const int file_handle = FileOpen(file_name, (file_flags | FILE_READ | FILE_TXT), '\n', file_code_page); if(file_handle == INVALID_HANDLE) { LogCriticalError(StringFormat("Error al abrir el archivo = %s, ultimo erorr = %d", file_name, GetLastError()), FUNCION_ACTUAL); return m; } //--- int curr_line = 0; string arr[]; int dt = 0; int curr_row = 0; string line = ""; //--- Se lee hasta el final if(end_line == SCALER_BASE_FINAL_LINE) { //--- while(!FileIsEnding(file_handle)) { //--- line = FileReadString(file_handle); //--- if(curr_line >= start_line) { //--- if((dt = StringSplit(line, sep, arr)) < 1) { LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL); FileClose(file_handle); return m; } //--- m.Resize(curr_row + 1, dt); for(int i = 0; i < dt; i++) { m[curr_row][i] = double(arr[i]); } //--- curr_row++; } //--- curr_line++; } } else { //--- while(!FileIsEnding(file_handle)) { //--- line = FileReadString(file_handle); //--- if(curr_line >= start_line) { //--- if((dt = StringSplit(line, sep, arr)) < 1) { LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL); FileClose(file_handle); return m; } //--- m.Resize(curr_row + 1, dt); for(int i = 0; i < dt; i++) { m[curr_row][i] = double(arr[i]); } //--- curr_row++; } //--- curr_line++; if(curr_line > end_line) break; } } //--- FileClose(file_handle); //--- return fit_transform(m, save_data); } //+------------------------------------------------------------------+ bool ScalerBase::fit_transform_file(const string &file_name, ushort sep, int extra_file_flags, bool save_data, uint file_code_page = CP_ACP, int start_line = 1) { //--- ResetLastError(); //--- matrix m = {}; int file_handle = FileOpen(file_name, (extra_file_flags | FILE_TXT | FILE_READ), '\n', file_code_page); if(file_handle == INVALID_HANDLE) { LogCriticalError(StringFormat("Error al abrir el archivo = %s, ultimo erorr = %d", file_name, GetLastError()), FUNCION_ACTUAL); return false; } //--- int curr_line = 0; string arr[]; int dt = 0; int curr_row = 0; string line = ""; string extra[]; ArrayResize(extra, 0); //--- Se lee hasta el final while(!FileIsEnding(file_handle)) { //--- line = FileReadString(file_handle); //PrintFormat("'%s'", line); //--- if(curr_line >= start_line) { //--- if((dt = StringSplit(line, sep, arr)) < 1) { LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL); FileClose(file_handle); return false; } //--- m.Resize(curr_row + 1, dt); for(int i = 0; i < dt; i++) { m[curr_row][i] = double(arr[i]); } //--- curr_row++; } else { extra[ArrayResize(extra, ArraySize(extra) + 1) - 1] = line; } //--- curr_line++; } //--- Transformamos y cerramos m = fit_transform(m, save_data); FileClose(file_handle); //--- Reescribir // Abrimos pero en escritura file_handle = FileOpen(file_name, (extra_file_flags | FILE_TXT | FILE_WRITE), 0, file_code_page); if(file_handle == INVALID_HANDLE) { LogCriticalError(StringFormat("Error al abrir el archivo = %s, ultimo erorr = %d", file_name, GetLastError()), FUNCION_ACTUAL); return false; } // Init line for(int i = 0; i < ArraySize(extra); i++) FileWrite(file_handle, extra[i]); // Matrix const ulong rows = m.Rows(); const ulong cols = m.Cols(); for(ulong row = 0; row < rows; row++) { //--- line = ""; for(ulong col = 0; col < cols; col++) { line += string(m[row][col]) + ", "; } StringSetLength(line, StringLen(line) - 2); //--- FileWrite(file_handle, line); } //--- Ceramos el archivo FileClose(file_handle); //--- return true; } //+------------------------------------------------------------------+ matrix ScalerBase::fit_transform_by_src(const string& src, ushort sep, bool save_data, int str_start_line = 1, int str_end_line = SCALER_BASE_FINAL_LINE) { //--- matrix m = {}; //--- int curr_line = 0; string arr[]; int dt = 0; int curr_row = 0; string general_arr[]; string line = ""; //--- const int total_row = StringSplit(src, '\n', general_arr); if(total_row <= str_start_line) { LogError(StringFormat("Numero de filas del string %d es invalida", total_row), FUNCION_ACTUAL); return m; } //--- Se lee hasta el final if(str_end_line == SCALER_BASE_FINAL_LINE) { //--- for(int r = 0; r < total_row; r++) { //--- line = general_arr[r]; //--- if(curr_line >= str_start_line) { //--- if((dt = StringSplit(line, sep, arr)) < 1) { LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL); return m; } //--- m.Resize(curr_row + 1, dt); for(int i = 0; i < dt; i++) { m[curr_row][i] = double(arr[i]); } //--- curr_row++; } //--- curr_line++; } } else { //--- for(int r = 0; r < total_row; r++) { //--- line = general_arr[r]; //--- if(curr_line >= str_start_line) { //--- if((dt = StringSplit(line, sep, arr)) < 1) { LogError(StringFormat("Linea mal formada = %d:\n%s", curr_line, line), FUNCION_ACTUAL); return m; } //--- m.Resize(curr_row + 1, dt); for(int i = 0; i < dt; i++) { m[curr_row][i] = double(arr[i]); } //--- curr_row++; } //--- curr_line++; if(curr_line > str_end_line) break; } } //--- return fit_transform(m, save_data); } //+------------------------------------------------------------------+ //| Standardization Scaler | //+------------------------------------------------------------------+ class StandardizationScaler : public ScalerBase { protected: vector mean, std; bool Save() override; bool Load(string prefix_name) override; public: StandardizationScaler() : ScalerBase() { this.prefix_file = "_mean_std.csv"; } matrix fit_transform(const matrix &X, bool save_data) override; vector fit_transform(const vector &X) override; //Para vectores no se guarda data matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; }; //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ bool StandardizationScaler::Save() { FileDelete(this.file_name_out); ResetLastError(); int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } FileWrite(handle, vector_to_string(this.mean)); FileWrite(handle, vector_to_string(this.std)); FileWrite(handle, count_cols); FileWrite(handle, start_col); FileWrite(handle, excluyed_cols); FileWrite(handle, (int)use_custom); FileClose(handle); return true; } //+------------------------------------------------------------------+ bool StandardizationScaler::Load(string prefix_name) { this.file_name_out = prefix_name + this.prefix_file; ResetLastError(); int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n"); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } this.mean = string_to_vector(FileReadString(handle)); this.std = string_to_vector(FileReadString(handle)); this.count_cols = (ulong)StringToInteger(FileReadString(handle)); this.start_col = (ulong)StringToInteger(FileReadString(handle)); this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle)); this.use_custom = (bool)StringToInteger(FileReadString(handle)); FileClose(handle); return true; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ vector StandardizationScaler::fit_transform(const vector &X) { if(loaded_scaler) { LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- vector X_to_scaled = ExtractVectorToScale(X); //--- double mean_val = X_to_scaled.Mean(); double std_val = X_to_scaled.Std(); if(std_val < SCALER_BASE_MIN_VALUE) std_val = 1.0; //--- for(ulong i = 0; i < X_to_scaled.Size(); i++) X_to_scaled[i] = (X_to_scaled[i] - mean_val) / std_val; //--- return ReconstructVector(X, X_to_scaled); } //+------------------------------------------------------------------+ matrix StandardizationScaler::fit_transform(const matrix &X, bool save_data) { if(loaded_scaler) { LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL); //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = ExtractMatrixToScale(X); const ulong X_to_scale_cols = X_to_scale.Cols(); const ulong X_to_scale_rows = X_to_scale.Rows(); LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale.Cols()), FUNCION_ACTUAL); //--- vector mean_cts(X_to_scale_cols); vector std_cts(X_to_scale_cols); //--- for(ulong i = 0; i < X_to_scale_cols; i++) { mean_cts[i] = X_to_scale.Col(i).Mean(); std_cts[i] = X_to_scale.Col(i).Std(); // Evitar división por cero if(std_cts[i] < SCALER_BASE_MIN_VALUE) { LogWarning(StringFormat("Columna %I64u tiene std muy pequeño (%.2e), usando 1.0", i, std_cts[i]), FUNCION_ACTUAL); std_cts[i] = 1.0; } } //--- for(ulong row = 0; row < X_to_scale_rows; row++) for(ulong col = 0; col < X_to_scale_cols; col++) X_to_scale[row][col] = (X_to_scale[row][col] - mean_cts[col]) / std_cts[col]; //--- if(save_data) { this.mean = mean_cts; this.std = std_cts; } //--- Aqui siempre se reconstruye return ReconstructMatrix(X, X_to_scale); } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ matrix StandardizationScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL); return X; } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X; const ulong X_to_scale_cols = X_to_scale.Cols(); const ulong X_to_scale_rows = X_to_scale.Rows(); //--- if(X_to_scale_cols != this.mean.Size()) { LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale_cols, this.mean.Size()), FUNCION_ACTUAL); return X; } //--- for(ulong row = 0; row < X_to_scale_rows; row++) for(ulong col = 0; col < X_to_scale_cols; col++) X_to_scale[row][col] = (X_to_scale[row][col] - this.mean[col]) / this.std[col]; //--- return (reconstruir) ? ReconstructMatrix(X, X_to_scale) : X_to_scale; } //+------------------------------------------------------------------+ vector StandardizationScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL); return X; } //--- vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X; const ulong X_to_scale_size = X_to_scale.Size(); if(X_to_scale_size != this.mean.Size()) { LogError(StringFormat("Elementos a escalar %I64 != elementos entrenados %u", X_to_scale_size, this.mean.Size()), FUNCION_ACTUAL); return X; } //--- for(ulong i = 0; i < X_to_scale_size; i++) X_to_scale[i] = (X_to_scale[i] - this.mean[i]) / this.std[i]; //--- return (reconstruir) ? ReconstructVector(X, X_to_scale) : X_to_scale; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ matrix StandardizationScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } //--- matrix X_to_unscale = (solo_escalar_lo_previsto) ? ExtractMatrixToScale(X_scaled) : X_scaled; const ulong X_to_unscale_cols = X_to_unscale.Cols(); const ulong X_to_unscale_rows = X_to_unscale.Rows(); //--- if(X_to_unscale_cols != this.mean.Size()) { LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale_cols, this.mean.Size()), FUNCION_ACTUAL); return X_scaled; } //--- for(ulong row = 0; row < X_to_unscale_rows; row++) for(ulong col = 0; col < X_to_unscale_cols; col++) X_to_unscale[row][col] = X_to_unscale[row][col] * this.std[col] + this.mean[col]; //--- return (reconstruir) ? ReconstructMatrix(X_scaled, X_to_unscale) : X_to_unscale; } //+------------------------------------------------------------------+ vector StandardizationScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } //--- vector X_to_unscale = (solo_escalar_lo_previsto) ? ExtractVectorToScale(X_scaled) : X_scaled; const ulong X_to_unscale_size = X_to_unscale.Size(); //--- if(X_to_unscale_size != this.mean.Size()) { LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale_size, this.mean.Size()), FUNCION_ACTUAL); return X_scaled; } //--- for(ulong i = 0; i < X_to_unscale_size; i++) X_to_unscale[i] = X_to_unscale[i] * this.std[i] + this.mean[i]; //--- return (reconstruir) ? ReconstructVector(X_scaled, X_to_unscale) : X_to_unscale; } //+------------------------------------------------------------------+ //| MaxMin Scaler | //+------------------------------------------------------------------+ class MaxMinScaler : public ScalerBase { protected: vector min_vals, max_vals; bool Save() override; bool Load(string prefix_name) override; public: MaxMinScaler() : ScalerBase() { this.prefix_file = "_min_max.csv"; } vector fit_transform(const vector &X) override; matrix fit_transform(const matrix &X, bool save_data) override; matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; }; //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ bool MaxMinScaler::Save() { FileDelete(this.file_name_out); ResetLastError(); //--- int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } //--- FileWrite(handle, vector_to_string(this.min_vals)); FileWrite(handle, vector_to_string(this.max_vals)); FileWrite(handle, count_cols); FileWrite(handle, start_col); FileWrite(handle, excluyed_cols); FileWrite(handle, (int)use_custom); //--- FileClose(handle); return true; } //+------------------------------------------------------------------+ bool MaxMinScaler::Load(string prefix_name) { this.file_name_out = prefix_name + this.prefix_file; //--- ResetLastError(); int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n"); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } //--- this.min_vals = string_to_vector(FileReadString(handle)); this.max_vals = string_to_vector(FileReadString(handle)); this.count_cols = (ulong)StringToInteger(FileReadString(handle)); this.start_col = (ulong)StringToInteger(FileReadString(handle)); this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle)); this.use_custom = (bool)StringToInteger(FileReadString(handle)); //--- FileClose(handle); return true; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ vector MaxMinScaler::fit_transform(const vector &X) { if(loaded_scaler) { LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- vector X_to_scale = ExtractVectorToScale(X); //--- double max = X_to_scale.Max(); double min = X_to_scale.Min(); if(max - min < SCALER_BASE_MIN_VALUE) max = min + 1.0; //--- for(ulong i = 0; i < X_to_scale.Size(); i++) X_to_scale[i] = (X_to_scale[i] - min) / (max - min); //--- return ReconstructVector(X, X_to_scale); } //+------------------------------------------------------------------+ matrix MaxMinScaler::fit_transform(const matrix &X, bool save_data) { if(loaded_scaler) { LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL); //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = ExtractMatrixToScale(X); const ulong X_to_scale_cols = X_to_scale.Cols(); const ulong X_to_scale_rows = X_to_scale.Rows(); LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale_cols), FUNCION_ACTUAL); //--- vector min_vals_cts(X_to_scale_cols); vector max_vals_cts(X_to_scale_cols); //--- for(ulong i = 0; i < X_to_scale_cols; i++) { min_vals_cts[i] = X_to_scale.Col(i).Min(); max_vals_cts[i] = X_to_scale.Col(i).Max(); if(fabs(max_vals_cts[i] - min_vals_cts[i]) < SCALER_BASE_MIN_VALUE) { LogWarning(StringFormat("Columna %I64u tiene rango muy pequeño (%.2e), usando rango 1.0", i, max_vals_cts[i] - min_vals_cts[i]), FUNCION_ACTUAL); max_vals_cts[i] = min_vals_cts[i] + 1.0; } } //--- //Print(min_vals_cts); //--- for(ulong row = 0; row < X_to_scale_rows; row++) for(ulong col = 0; col < X_to_scale_cols; col++) X_to_scale[row][col] = (X_to_scale[row][col] - min_vals_cts[col]) / (max_vals_cts[col] - min_vals_cts[col]); //--- if(save_data) { this.min_vals = min_vals_cts; this.max_vals = max_vals_cts; } //--- Aqui siempre se reconstruye return ReconstructMatrix(X, X_to_scale); } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ matrix MaxMinScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL); return X; } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X; const ulong X_to_scale_cols = X_to_scale.Cols(); const ulong X_to_scale_rows = X_to_scale.Rows(); //--- if(X_to_scale_cols != this.min_vals.Size()) { LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale_cols, this.min_vals.Size()), FUNCION_ACTUAL); return X; } //--- for(ulong row = 0; row < X_to_scale_rows; row++) for(ulong col = 0; col < X_to_scale_cols; col++) X_to_scale[row][col] = (X_to_scale[row][col] - this.min_vals[col]) / (this.max_vals[col] - this.min_vals[col]); //--- return (reconstruir) ? ReconstructMatrix(X, X_to_scale) : X_to_scale; } //+------------------------------------------------------------------+ vector MaxMinScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL); return X; } //--- vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X; const ulong X_to_scale_size = X_to_scale.Size(); if(X_to_scale_size != this.min_vals.Size()) { LogError(StringFormat("Elementos a escalar %I64u != elementos entrenados %u", X_to_scale_size, this.min_vals.Size()), FUNCION_ACTUAL); return X; } //--- for(ulong i = 0; i < X_to_scale_size; i++) X_to_scale[i] = (X_to_scale[i] - this.min_vals[i]) / (this.max_vals[i] - this.min_vals[i]); //--- return (reconstruir) ? ReconstructVector(X, X_to_scale) : X_to_scale; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ matrix MaxMinScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } //--- matrix X_to_unscale = (solo_escalar_lo_previsto) ? ExtractMatrixToScale(X_scaled) : X_scaled; const ulong X_to_unscale_cols = X_to_unscale.Cols(); const ulong X_to_unscale_rows = X_to_unscale.Rows(); //--- if(X_to_unscale_cols != this.min_vals.Size()) { LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale_cols, this.min_vals.Size()), FUNCION_ACTUAL); return X_scaled; } //--- for(ulong row = 0; row < X_to_unscale_rows; row++) for(ulong col = 0; col < X_to_unscale_cols; col++) X_to_unscale[row][col] = X_to_unscale[row][col] * (this.max_vals[col] - this.min_vals[col]) + this.min_vals[col]; //--- return (reconstruir) ? ReconstructMatrix(X_scaled, X_to_unscale) : X_to_unscale; } //+------------------------------------------------------------------+ vector MaxMinScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } //--- vector X_to_unscale = (solo_escalar_lo_previsto) ? ExtractVectorToScale(X_scaled) : X_scaled; const ulong X_to_unscale_size = X_to_unscale.Size(); //--- if(X_to_unscale_size != this.min_vals.Size()) { LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale_size, this.min_vals.Size()), FUNCION_ACTUAL); return X_scaled; } //--- for(ulong i = 0; i < X_to_unscale_size; i++) X_to_unscale[i] = X_to_unscale[i] * (this.max_vals[i] - this.min_vals[i]) + this.min_vals[i]; //--- return (reconstruir) ? ReconstructVector(X_scaled, X_to_unscale) : X_to_unscale; } //+------------------------------------------------------------------+ //| Robust Scaler | //+------------------------------------------------------------------+ class RobustScaler : public ScalerBase { protected: vector medians, iqrs; bool Save() override; bool Load(string prefix_name) override; public: RobustScaler() : ScalerBase() { this.prefix_file = "_median_iqr.csv"; } vector fit_transform(const vector &X) override; matrix fit_transform(const matrix &X, bool save_data) override; matrix transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; matrix inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; vector inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) override; }; //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ bool RobustScaler::Save() { FileDelete(this.file_name_out); ResetLastError(); //--- int handle = FileOpen(this.file_name_out, FILE_WRITE | FILE_CSV | FILE_COMMON); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } //--- FileWrite(handle, vector_to_string(this.medians)); FileWrite(handle, vector_to_string(this.iqrs)); FileWrite(handle, count_cols); FileWrite(handle, start_col); FileWrite(handle, excluyed_cols); FileWrite(handle, (int)use_custom); //--- FileClose(handle); return true; } //+------------------------------------------------------------------+ bool RobustScaler::Load(string prefix_name) { this.file_name_out = prefix_name + this.prefix_file; //--- ResetLastError(); int handle = FileOpen(file_name_out, FILE_READ | FILE_CSV | FILE_COMMON, "\n"); if(handle == INVALID_HANDLE) { LogFatalError(StringFormat("Invalid handle Err= %d >> Filename= %s", GetLastError(), this.file_name_out), FUNCION_ACTUAL); return false; } //--- this.medians = string_to_vector(FileReadString(handle)); this.iqrs = string_to_vector(FileReadString(handle)); this.count_cols = (ulong)StringToInteger(FileReadString(handle)); this.start_col = (ulong)StringToInteger(FileReadString(handle)); this.excluyed_cols = (ulong)StringToInteger(FileReadString(handle)); this.use_custom = (bool)StringToInteger(FileReadString(handle)); //--- FileClose(handle); return true; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ vector RobustScaler::fit_transform(const vector &X) { if(loaded_scaler) { LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- vector X_to_scale = ExtractVectorToScale(X); //--- double medians_cts = X_to_scale.Median(); double q75 = X_to_scale.Percentile(75); double q25 = X_to_scale.Percentile(25); double iqrs_cts = q75 - q25; if(fabs(iqrs_cts) < SCALER_BASE_MIN_VALUE) iqrs_cts = 1.00; //--- for(ulong i = 0; i < X_to_scale.Size(); i++) X_to_scale[i] = (X_to_scale[i] - medians_cts) / iqrs_cts; //--- return ReconstructVector(X, X_to_scale); } //+------------------------------------------------------------------+ matrix RobustScaler::fit_transform(const matrix &X, bool save_data) { if(loaded_scaler) { LogWarning("Este es un escalador cargado >> no es necesario ajustarlo a los nuevos datos, llame a otra instancia de una clase", FUNCION_ACTUAL); return transform(X); } LogInfo(StringFormat("Numero de columnas de entrada: %I64u", X.Cols()), FUNCION_ACTUAL); //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = ExtractMatrixToScale(X); const ulong X_to_scale_cols = X_to_scale.Cols(); const ulong X_to_scale_rows = X_to_scale.Rows(); LogInfo(StringFormat("Columnas a escalar: %I64u", X_to_scale_cols), FUNCION_ACTUAL); //--- vector medians_cts(X_to_scale_cols); vector iqrs_cts(X_to_scale_cols); //--- for(ulong i = 0; i < X_to_scale_cols; i++) { vector col = X_to_scale.Col(i); medians_cts[i] = col.Median(); double q75 = col.Percentile(75); double q25 = col.Percentile(25); iqrs_cts[i] = q75 - q25; if(fabs(iqrs_cts[i]) < SCALER_BASE_MIN_VALUE) { LogWarning(StringFormat("Columna %I64u tiene IQR muy pequeño (%.2e), usando 1.0", i, iqrs_cts[i]), FUNCION_ACTUAL); iqrs_cts[i] = 1.0; } } //--- for(ulong row = 0; row < X_to_scale_rows; row++) for(ulong col = 0; col < X_to_scale_cols; col++) X_to_scale[row][col] = (X_to_scale[row][col] - medians_cts[col]) / iqrs_cts[col]; //--- if(save_data) { this.medians = medians_cts; this.iqrs = iqrs_cts; } //--- Aqui siempre se reconstruye return ReconstructMatrix(X, X_to_scale); } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ matrix RobustScaler::transform(const matrix &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL); return X; } //--- if(use_custom) { if(!CheckSizeCustom(X)) return X; } else { if(!CheckSizeExcluded(X)) return X; } //--- matrix X_to_scale = solo_escalar_lo_previsto ? ExtractMatrixToScale(X) : X; const ulong X_to_scale_cols = X_to_scale.Cols(); const ulong X_to_scale_rows = X_to_scale.Rows(); //--- if(X_to_scale_cols != this.medians.Size()) { LogError(StringFormat("Columnas a escalar %I64u != columnas entrenadas %u", X_to_scale_cols, this.medians.Size()), FUNCION_ACTUAL); return X; } //--- for(ulong row = 0; row < X_to_scale_rows; row++) for(ulong col = 0; col < X_to_scale_cols; col++) X_to_scale[row][col] = (X_to_scale[row][col] - this.medians[col]) / this.iqrs[col]; //--- return (reconstruir) ? ReconstructMatrix(X, X_to_scale) : X_to_scale; } //+------------------------------------------------------------------+ vector RobustScaler::transform(const vector &X, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Primero llame a fit_transform o load() antes de transform", FUNCION_ACTUAL); return X; } //--- vector X_to_scale = solo_escalar_lo_previsto ? ExtractVectorToScale(X) : X; const ulong X_to_scale_size = X_to_scale.Size(); if(X_to_scale_size != this.medians.Size()) { LogError(StringFormat("Elementos a escalar %I64u != elementos entrenados %u", X_to_scale_size, this.medians.Size()), FUNCION_ACTUAL); return X; } //--- for(ulong i = 0; i < X_to_scale_size; i++) X_to_scale[i] = (X_to_scale[i] - this.medians[i]) / this.iqrs[i]; //--- return (reconstruir) ? ReconstructVector(X, X_to_scale) : X_to_scale; } //+------------------------------------------------------------------+ //| | //+------------------------------------------------------------------+ matrix RobustScaler::inverse_transform(const matrix &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } //--- matrix X_to_unscale = (solo_escalar_lo_previsto) ? ExtractMatrixToScale(X_scaled) : X_scaled; const ulong X_to_unscale_cols = X_to_unscale.Cols(); const ulong X_to_unscale_rows = X_to_unscale.Rows(); //--- if(X_to_unscale_cols != this.medians.Size()) { LogError(StringFormat("Columnas escaladas %I64u != columnas entrenadas %u", X_to_unscale_cols, this.medians.Size()), FUNCION_ACTUAL); return X_scaled; } //--- for(ulong row = 0; row < X_to_unscale_rows; row++) for(ulong col = 0; col < X_to_unscale_cols; col++) X_to_unscale[row][col] = X_to_unscale[row][col] * this.iqrs[col] + this.medians[col]; //--- return (reconstruir) ? ReconstructMatrix(X_scaled, X_to_unscale) : X_to_unscale; } //+------------------------------------------------------------------+ vector RobustScaler::inverse_transform(const vector &X_scaled, bool solo_escalar_lo_previsto = false, bool reconstruir = false) { if(!loaded_scaler) { LogError("Escalador no entrenado. Llame primero a fit_transform", FUNCION_ACTUAL); return X_scaled; } //--- vector X_to_unscale = (solo_escalar_lo_previsto) ? ExtractVectorToScale(X_scaled) : X_scaled; const ulong X_to_unscale_size = X_to_unscale.Size(); //--- if(X_to_unscale_size != this.medians.Size()) { LogError(StringFormat("Elementos escalados %I64u != elementos entrenados %u", X_to_unscale_size, this.medians.Size()), FUNCION_ACTUAL); return X_scaled; } //--- for(ulong i = 0; i < X_to_unscale_size; i++) X_to_unscale[i] = X_to_unscale[i] * this.iqrs[i] + this.medians[i]; //--- return (reconstruir) ? ReconstructVector(X_scaled, X_to_unscale) : X_to_unscale; } //+------------------------------------------------------------------+ #endif // SCALERBYLEO_SCALER_BASE_MQH //+------------------------------------------------------------------+