ExpressEvalByLeo/Src/BoleanEval/Tokenizer.mqh
Nique_372 68d060365e
2026-05-14 16:54:23 -05:00

454 lines
12 KiB
MQL5

//+------------------------------------------------------------------+
//| Tokenizer.mqh |
//| Copyright 2026, Niquel Mendoza. |
//| https://www.mql5.com/es/users/nique_372 |
//+------------------------------------------------------------------+
#property copyright "Copyright 2026, Niquel Mendoza."
#property link "https://www.mql5.com/es/users/nique_372"
#property strict
#ifndef EXPRESSEVALBYLEO_SRC_BOOLEAN_EVAL_MQH
#define EXPRESSEVALBYLEO_SRC_BOOLEAN_EVAL_MQH
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
#include "Defines.mqh"
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
struct CBoleanOpsTokenizer : public CTokenizerBase
{
private:
ENUM_OPS_TOKENIZER_BOOLEAN_ERR m_last_err;
public:
CBoleanOpsTokenizer(void) : m_last_err(WRONG_VALUE) {}
~CBoleanOpsTokenizer(void) {}
//---
bool Tokenize(const string& text, TokenOpsBoolean& tokns[]);
//---
__forceinline ENUM_OPS_TOKENIZER_BOOLEAN_ERR LastError() const { return m_last_err; }
string FormatLastError(const string& text) const;
//---
static void PrintValues(const TokenOpsBoolean& tokens[]);
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool CBoleanOpsTokenizer::Tokenize(const string &text, TokenOpsBoolean& tokns[])
{
//---
m_len = StringLen(text);
m_pos = 0;
m_err_pos = 0;
m_last_err = WRONG_VALUE;
//---
int reserve_size = 16;
ArrayResize(tokns, reserve_size);
int current_size = 0;
//---
while(m_pos < m_len)
{
//--- Saltamos espacios
if(text[m_pos] < 33)
{
m_pos++;
continue;
}
//---
switch(text[m_pos])
{
//--- No es igual
case '!':
{
m_pos++;
if(m_pos < m_len && text[m_pos] == '=')
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_OPERADOR_BOLEANO | int(BOOLEAN_OP_TYPE_NO_IGUAL) << AST_LOGIC_EXTRA_TYPE_START_BIT;
}
else
{
m_last_err = OPS_TOKENIZER_BOOLEAN_ERR_OP_NO_IGUAL_MAL_FORMADO;
m_err_pos = m_pos - 1; // inicio previo
return false;
}
//---
AST_OPS_CHECK_RESIZE
//---
break;
}
//--- String
case '"':
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_VALUE_STRING;
if(!CGenericParser::UnescapeString(text, m_len, m_pos, (int&)m_last_err, m_err_pos, tokns[current_size]))
return false;
AST_OPS_CHECK_RESIZE
break;
}
//--- LLmada a funcion custom
case '#':
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_VALUE_CUSTOM_FUNCTION_CALL;
if(!CGenericParser::ParseFunction(text, m_len, m_pos, (int&)m_last_err, m_err_pos, tokns[current_size]))
{
return false;
}
AST_OPS_CHECK_RESIZE
//---
break;
}
//--- Uso de varible
case '$':
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_VALUE_VARIABLE;
CGenericParser::ParseVariable(text, m_len, m_pos, (int&)m_last_err, m_err_pos, tokns[current_size]);
AST_OPS_CHECK_RESIZE
//---
break; // Salimos
}
//--- Inicio de paraenteisis
case '(':
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_PARA_INI;
AST_OPS_CHECK_RESIZE
break;
}
//--- Final de parentenisis
case ')':
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_PARA_END;
AST_OPS_CHECK_RESIZE
break;
}
//---
// Numero negativo
case '-':
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_VALUE_NUMBER;
if(!CGenericParser::ParseNumberNoHex(text, m_len, m_pos, (int&)m_last_err, m_err_pos, tokns[current_size], m_pos++))
return false;
AST_OPS_CHECK_RESIZE
break;
}
//--- Menor (o menor igual que)
case '<':
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_OPERADOR_BOLEANO;
if(m_pos + 1 < m_len && text[m_pos + 1] == '=')
{
m_pos++;
tokns[current_size].type |= int(BOOLEAN_OP_TYPE_MENOR_IGUAL) << AST_LOGIC_EXTRA_TYPE_START_BIT;
}
else
{
tokns[current_size].type |= int(BOOLEAN_OP_TYPE_MENOR) << AST_LOGIC_EXTRA_TYPE_START_BIT;
}
//---
AST_OPS_CHECK_RESIZE
//---
break;
}
//--- Igualdad
case '=':
{
m_pos++;
if(m_pos < m_len && text[m_pos] == '=')
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_OPERADOR_BOLEANO | int(BOOLEAN_OP_TYPE_IGUAL) << AST_LOGIC_EXTRA_TYPE_START_BIT;
}
else
{
m_last_err = OPS_TOKENIZER_BOOLEAN_ERR_OP_IGUAL_MAL_FORMADO;
m_err_pos = m_pos - 1; // inicio previo
return false;
}
//---
AST_OPS_CHECK_RESIZE
//---
break;
}
//--- Mayor (o mayor igual que)
case '>':
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_OPERADOR_BOLEANO;
if(m_pos + 1 < m_len && text[m_pos + 1] == '=')
{
m_pos++;
tokns[current_size].type |= int(BOOLEAN_OP_TYPE_MAYOR_IGUAL) << AST_LOGIC_EXTRA_TYPE_START_BIT;
}
else
{
tokns[current_size].type |= int(BOOLEAN_OP_TYPE_MAYOR) << AST_LOGIC_EXTRA_TYPE_START_BIT;
}
//---
AST_OPS_CHECK_RESIZE
//---
break;
}
//--- LLamada a funcion build-in
case '@':
{
// Dentro de las funciones BUILD-IN
// Estaran todas las de Math* String*
// Aparte de @sop y @mop que signfica "string operations" y "math operations (Tokenizer math)" ejemplo:
// @mop(10 * 20 * 30 * ($val & 20))
// @sop(10 * "ab")
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_VALUE_BUILD_IN_FUNCTION_CALL;
if(!CGenericParser::ParseFunction(text, m_len, m_pos, (int&)m_last_err, m_err_pos, tokns[current_size]))
{
return false;
}
AST_OPS_CHECK_RESIZE
//---
break;
}
//---
case 'R':
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_VALUE_STRING;
if(!CGenericParser::ParseRawString(text, m_len, m_pos, (int&)m_last_err, m_err_pos, tokns[current_size]))
{
return false;
}
//---
AST_OPS_CHECK_RESIZE
break;
}
//---
case 't':
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_VALUE_BOOLEAN | int(1) << AST_LOGIC_EXTRA_TYPE_START_BIT;
AST_OPS_CHECK_RESIZE
//---
while(text[m_pos] != ' ' && m_pos < m_len)
{
m_pos++;
}
/// Ahora mismo estamos en el espacio... siguiente caracter es el valido
//---
break;
}
//---
case 'f':
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_VALUE_BOOLEAN;
AST_OPS_CHECK_RESIZE
//---
while(text[m_pos] != ' ' && m_pos < m_len)
{
m_pos++;
}
/// Ahora mismo estamos en el espacio... siguiente caracter es el valido
//---
break;
}
//---
case 'o':
{
m_pos++;
//--- Obtenemos el operador
ENUM_AST_LOGIC_OPERATOR op_type = AST_LOGIC_OPERATOR_OR;
const ushort op = text[m_pos];
if(op == 'X') // Xor | Xnor
{
//---
if(m_pos + 1 >= m_len)
{
m_last_err = OPS_TOKENIZER_BOOLEAN_ERR_OP_LOGICO_X_MAL_FORMADO;
m_err_pos = m_pos - 1;
return false;
}
//---
op_type = text[m_pos + 1] == 'o' ? AST_LOGIC_OPERATOR_XOR : AST_LOGIC_OPERATOR_XNOR;
}
else
if(op == 'A') // And
op_type = AST_LOGIC_OPERATOR_AND;
else
if(op == 'N') // Nand | Nor
{
//---
if(m_pos + 1 >= m_len)
{
m_last_err = OPS_TOKENIZER_BOOLEAN_ERR_OP_LOGICO_N_MAL_FORMADO;
m_err_pos = m_pos - 1;
return false;
}
//---
op_type = text[m_pos + 1] == 'a' ? AST_LOGIC_OPERATOR_NAND : AST_LOGIC_OPERATOR_NOR;
}
//---
m_pos += g_arr_ast_logic_op_len_for_parser[op_type];
//---
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_OPERADOR_LOGICO | int(op_type) << AST_LOGIC_EXTRA_TYPE_START_BIT;
//---
AST_OPS_CHECK_RESIZE
//---
break;
}
//---
default:
{
if(IsDigit(text[m_pos]))
{
tokns[current_size].type = BOOLEAN_OPS_TOKEN_TYPE_VALUE_NUMBER;
if(!CGenericParser::ParseNumber(text, m_len, m_pos, (int&)m_last_err, m_err_pos, tokns[current_size]))
return false;
//---
AST_OPS_CHECK_RESIZE
break;
}
else
{
m_last_err = OPS_TOKENIZER_BOOLEAN_ERR_CARACTER_INVALIDO;
m_err_pos = m_pos;
return false;
}
}
}
//--- Siguiente
m_pos++;
}
//---
ArrayResize(tokns, current_size);
//---
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
string CBoleanOpsTokenizer::FormatLastError(const string& text) const
{
//---
if(m_last_err == WRONG_VALUE)
{
return "Not errors found";
}
//---
const uint r_err = m_last_err & 0xFFFFFF;
//Print(m_last_err, " & 0xFFFFFF = ", r_err);
const int type = m_last_err >> AST_LOGIC_ERR_TYPE_START_BIT;
//---
int line = 0, col = 0;
GetCharPosLocation(m_err_pos, text, col, line);
//---
string error_msg = "";
//---
// Print(type, " | ", r_err);
if(type == AST_LOGIC_ERR_TYPE_EXTERNAL)
{
CGenericParser::FormatErr(r_err, error_msg);
}
else
{
switch(r_err)
{
case OPS_TOKENIZER_BOOLEAN_ERR_OP_IGUAL_MAL_FORMADO:
error_msg = "Malformed equality operator, expected '=='";
break;
case OPS_TOKENIZER_BOOLEAN_ERR_OP_LOGICO_X_MAL_FORMADO:
error_msg = "Malformed logical operator 'oXor', expected 'oXor'";
break;
case OPS_TOKENIZER_BOOLEAN_ERR_OP_LOGICO_N_MAL_FORMADO:
error_msg = "Malformed logical operator starting with 'o', expected 'oAnd', 'oOr', 'oXor', or 'oNand'";
break;
case OPS_TOKENIZER_BOOLEAN_ERR_CARACTER_INVALIDO:
error_msg = "Invalid character in expression";
break;
case OPS_TOKENIZER_BOOLEAN_ERR_OP_NO_IGUAL_MAL_FORMADO:
error_msg = "Malformed equality operator, expected '!='";
break;
default:
error_msg = "Unknown error";
break;
}
}
//---
return error_msg + StringFormat(" [Line %d, Col %d]", line, col);
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
static void CBoleanOpsTokenizer::PrintValues(const TokenOpsBoolean &tokens[])
{
const int t = ArraySize(tokens);
Print(" TYPE | VALUE");
for(int i = 0; i < t; i++)
{
const int v = tokens[i].type & 0xFFFF;
PrintFormat("%s | %s", EnumToString(ENUM_AST_BOOLEAN_OPS_TOKEN_TYPE(v)), tokens[i].vs);
}
}
//+------------------------------------------------------------------+
#endif // EXPRESSEVALBYLEO_SRC_BOOLEAN_EVAL_MQH