SimPHash/Src/PerfectHash.mqh
2026-06-29 07:41:51 -05:00

728 lines
22 KiB
MQL5

//+------------------------------------------------------------------+
//| PerfectHash.mqh |
//| Copyright 2026, Niquel Mendoza. |
//| https://www.mql5.com/ |
//+------------------------------------------------------------------+
#property copyright "Copyright 2026, Niquel Mendoza."
#property link "https://www.mql5.com/"
#property strict
#ifndef SIMPHASH_SRC_PERFECTHASH_MQH
#define SIMPHASH_SRC_PERFECTHASH_MQH
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
#include "Def.mqh"
#include "Random\\All.mqh"
#include "Hash\\All.mqh"
#include <TSN\\ExtraCodes\\WinApiExt.mqh>
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
namespace TSN
{
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
class CPerfectHashGenerator : public CLoggerBase
{
private:
//--- Parser config
CYamlParser m_yml;
CPerfectHashByLeo<ulong> m_perfect_hash;
//---
ulong m_i_max_attemps;
string m_i_nampespace;
string m_table_prefix;
IHashGenerator* m_hash; // str -> hash
IRandomGenerator* m_random; // random
IHashGeneratorWSeed* m_hash_pf_1;
string m_copyright;
string m_link;
string m_func_name;
string m_file_name_out;
string m_bucket_size_def_name;
string m_final_table_size_def_name;
string m_guard;
string m_invalid_value;
bool m_comment_func;
bool m_use_map_value;
//--- In
string m_keys[]; // Clave
int m_pos[]; // Index
ulong m_hashes[]; // Hashes
int m_table_size;
//---
ulong m_prev_hashes[];
//--- Out
ulong m_out_seeds[]; // Semillas
int m_out_slots[];
//---
bool BuildInternal();
bool Save();
bool SaveBin(CYmlNode& bin);
//--- Alg
bool TryBuildWithSeed(const ulong seed);
//--- Construccion
void CodeAddFuncBlock(string& data);
public:
CPerfectHashGenerator(void);
~CPerfectHashGenerator(void);
//---
bool Init(const string& file_name_yaml);
//---
bool RunAlg();
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
CPerfectHashGenerator::CPerfectHashGenerator()
: m_random(NULL), m_hash(NULL)
{
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
CPerfectHashGenerator::~CPerfectHashGenerator()
{
if(m_random != NULL)
delete m_random;
if(m_hash != NULL)
delete m_hash;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool CPerfectHashGenerator::Init(const string &file_name_yaml)
{
//---
HANDLE handle = kernel32::CreateFileW(file_name_yaml, GENERIC_READ, SHARE_READ, 0, OPEN_EXISTING, 0, 0);
if(handle == INVALID_HANDLE)
{
LogError(StringFormat("Al abrir el archivo = %s, last err = %d",
file_name_yaml, kernel32::GetLastError()), FUNCION_ACTUAL);
return false;
}
// Prev
long Size;
kernel32::GetFileSizeEx(handle, Size);
uint Read;
// Read
m_yml.m_len = (int)Size;
ArrayResize(m_yml.m_yml, m_yml.m_len);
kernel32::ReadFile(handle, m_yml.m_yml, (uint)Size, Read, 0);
kernel32::CloseHandle(handle);
//
if(Read < 5)
{
LogError("YAML vacio", FUNCION_ACTUAL);
return false;
}
//---
LogInfo("Parseando YAML", FUNCION_ACTUAL);
if(!m_yml.Parse())
{
LogError("Fallo al parsear YAML, mensaje del parser: ", FUNCION_ACTUAL);
m_yml.ErrorInfo();
return false;
}
//---
return BuildInternal();
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool CPerfectHashGenerator::BuildInternal(void)
{
//---
//m_yml.PrintCintaTypes();
//---
CYmlNode root = m_yml.GetRoot();
//--- Variables generales..
m_i_max_attemps = int(root["max_attepms"].ToInt(1000));
m_i_nampespace = root["namespace"].ToString("");
m_file_name_out = root["file_name_out"].ToString("C:\\Test.mqh");
m_table_prefix = root["table_prefix"].ToString("g_table");
m_copyright = root["copyright"].ToString("MyName");
m_link = root["link"].ToString("my_org.com");
m_func_name = root["func_name"].ToString("Hash");
m_bucket_size_def_name = root["def_bucket_size_name"].ToString("TABLE_SIZE_BUCKETS");
m_use_map_value = root["map_use_value"].ToBool(false);
m_final_table_size_def_name = root["def_name_table_size"].ToString("TABLE_SIZE_FINAL");
m_invalid_value = root["invalid_value"].ToString("NULL");
m_comment_func = root["comment_funct"].ToBool(false);
m_guard = root["guard_name"].ToString("");
//--- Random
CYmlNode random = root["random"];
if(!random.IsObject())
{
LogError("Key 'random' no existe o no es un objeto", FUNCION_ACTUAL);
return false;
}
if(CEnumRegSimpH::GetValNoRef<ENUM_PHASH_RANDOM>(random["type"].ToString(""), PHASH_RANDOM_MATH_XOSHIRO256) == PHASH_RANDOM_MATH_XOSHIRO256)
{
m_random = new CRandomX256();
}
else
{
m_random = new CRandomGeneratorMRand();
}
if(!m_random.Init(random["config"]))
return false;
//--- Hash
CYmlNode hash = root["hash"];
if(!hash.IsObject())
{
LogError("Key 'hash' no existe o no es un objeto", FUNCION_ACTUAL);
return false;
}
if(CEnumRegSimpH::GetValNoRef<ENUM_TSN_PHASH_TYPE>(hash["type"].ToString(""), TSN_PHASH_TYPE_FNV1A_64) == TSN_PHASH_TYPE_FNV1A_64)
{
m_hash = new CHashFnv1A64();
}
else
{
m_hash = new CHashXXHash();
}
if(!m_hash.Init(hash["config"]))
return false;
//--- Hash final
if(CEnumRegSimpH::GetValNoRef<ENUM_HASH_UL1_TYPE>(root["final_hash"].ToString(""), HASH_UL1_SPLITMIX) == HASH_UL1_SPLITMIX)
{
m_hash_pf_1 = new CHash1SplitMix();
}
else
{
m_hash_pf_1 = new CHash1MurMur();
}
//--- Mapa
CYmlNode map = root["map"];
if((map.GetFlag()&TSN_YAML_VTYPE_ARR_F | TSN_YAML_VTYPE_OBJ_F) == 0)
{
LogError("No se encontro el campo 'map' en yaml o este no es un objeto o array", FUNCION_ACTUAL);
return false;
}
m_table_size = map.Size();
if(m_table_size < 1)
{
LogError("Map tiene menos de un elemento", FUNCION_ACTUAL);
return false;
}
//--- Resize de (keys, hashes, poses)
ArrayResize(m_keys, m_table_size);
ArrayResize(m_prev_hashes, m_table_size);
ArrayResize(m_pos, m_table_size);
int k = 0;
//---
if(map.IsObject())
{
// clave:valor
CYmlIteratorObj it = map.BeginObj();
if(m_use_map_value)
{
LogInfo("Usando map values como valores", FUNCION_ACTUAL);
//---
while(it.IsValid())
{
m_keys[k] = it.Key();
m_prev_hashes[k] = m_hash.Hash(m_keys[k]);
m_pos[k] = k; // Orden...
k++;
it.Next();
}
}
else
{
LogInfo("Usando map values como indices", FUNCION_ACTUAL);
// No usamos solo indices.. (intermdio...)
while(it.IsValid())
{
m_keys[k] = it.Key();
m_prev_hashes[k] = m_hash.Hash(m_keys[k]);
m_pos[k] = int(it.Val().ToInt(0));
k++;
it.Next();
}
}
}
else
{
if(m_use_map_value)
{
LogError("No se puede usar map value en un array, se infiere indice..", FUNCION_ACTUAL);
return false;
}
// (clave) (y el indice al que apunta es su posicion en dicho array..)
CYmlIteratorArray it = map.BeginArr();
while(it.IsValid())
{
m_keys[k] = it.Val().ToString();
m_prev_hashes[k] = m_hash.Hash(m_keys[k]);
m_pos[k] = k;
k++;
it.Next();
}
}
//---
// ArrayInitialize(m_out_seeds, 0); ya lo hace
// Seed ya se rellena..
//--- Perfect hash
// Iniciamos el algoritmo..
m_perfect_hash.DeleteHashers(true);
m_perfect_hash.MaxValSeed(m_i_max_attemps);
m_perfect_hash.Hasher0(new CHashGeneratorBasis());
m_perfect_hash.Hasher1(m_hash_pf_1); // Dueño ojo..
CYmlNode perfect_hash = root["perfect_hash"];
m_perfect_hash.InitAlg(m_table_size, perfect_hash["load_factor"].ToDouble(0.80),
int(perfect_hash["elements_por_bucket"].ToInt(6)));
// Inicialziacion previa de sloots
ArrayResize(m_out_slots, m_perfect_hash.m_final_table_size);
ArrayInitialize(m_out_slots, -1); // Invalid indices
ArrayResize(m_hashes, m_perfect_hash.m_final_table_size);
ArrayInitialize(m_hashes, 0ULL); // Invalid hashes
//---
LogInfo(StringFormat("Perfect hash inicado\nNumero de intentos por bucket = %d, Tamaño de tabla = %d",
m_i_max_attemps, m_table_size), FUNCION_ACTUAL);
LogInfo(StringFormat("Numero de buckets = %d | Tamaño de la tabla final = %d",
m_perfect_hash.m_buckets_size, m_perfect_hash.m_final_table_size), FUNCION_ACTUAL);
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
#define PHASH_CODE_ADD_FBLOCK(data) \
data += "//+------------------------------------------------------------------+\r\n" + \
"//| |\r\n" + \
"//+------------------------------------------------------------------+\r\n";
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool CPerfectHashGenerator::Save(void)
{
//---
HANDLE handle = kernel32::CreateFileW(m_file_name_out, GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0);
if(handle == INVALID_HANDLE)
{
LogCriticalError(StringFormat("Fallo al crear archivo:\n'%s'\nUltimo err in kernel32 = %d",
m_file_name_out, kernel32::GetLastError()), FUNCION_ACTUAL);
return false;
}
//--- Header
string data = "//+-------------------------------------------------------------------+\r\n" +
"//| Include generado por la herramienta PerfectHash SimPHash |\r\n" +
"//| Esta heramienta forma parte del ecositema TSN |\r\n" +
"//| Repositorio: https://forge.mql5.io/nique_372/SimPHash |\r\n" +
"//+-------------------------------------------------------------------+\r\n" +
StringFormat("#property copyright \"%s\"\r\n", m_copyright) +
StringFormat("#property link \"%s\"\r\n", m_link) +
"#property strict\r\n" + "\r\n";
// ....
data += "\r\n";
//--- Includes
PHASH_CODE_ADD_FBLOCK(data)
m_hash.AddIncludes(data);
m_hash_pf_1.AddIncludes(data);
//---
if(m_guard.Length())
{
data += "#ifndef " + m_guard + "\r\n";
data += "#define " + m_guard + "\r\n";
data += "\r\n";
}
//--- Defines
// Definimos el tamaño del conjunto de datos de entrada in
PHASH_CODE_ADD_FBLOCK(data)
data += "#define " + m_final_table_size_def_name + " (" + string(m_perfect_hash.m_final_table_size) + "ULL)\r\n";
data += "#define " + m_bucket_size_def_name + " (" + string(m_perfect_hash.m_buckets_size) + "ULL)\r\n";
data += "\r\n";
//--- Bloque de codiho
PHASH_CODE_ADD_FBLOCK(data)
//--- Namepsace
const bool is_n = m_i_nampespace != "";
if(is_n)
data += StringFormat("namespace %s\r\n{\r\n", m_i_nampespace);
//--- Seeds table
const string table_seeds_name = m_table_prefix + "_seeds";
data += "const ulong " + table_seeds_name + "[" + m_bucket_size_def_name + "] = \r\n";
data += "{\r\n";
const int _last = m_perfect_hash.m_buckets_size - 1;
for(int i = 0; i < _last; i++)
{
data += string(m_out_seeds[i]) + "ULL,\r\n";
}
data += string(m_out_seeds[_last]) + "ULL\r\n";
data += "};\r\n";
data += "\r\n";
//--- Hashtable
const string table_hash_name = m_table_prefix + "_hashes";
data += "const ulong " + table_hash_name + "[" + m_final_table_size_def_name + "] = \r\n";
data += "{\r\n";
const int _last_ = m_perfect_hash.m_final_table_size - 1;
for(int i = 0; i < _last_; i++)
{
data += string(m_hashes[i]) + "ULL," + (m_out_slots[i] == -1 ? (" // invalid") : (" // " + m_keys[m_out_slots[i]])) + "\r\n";
}
data += string(m_hashes[_last_]) + "ULL\r\n";
data += "};\r\n";
data += "\r\n";
//--- General
const string table_slots_name = m_table_prefix + "_tindex";
CYmlNode map = m_yml.GetRoot()["map"];
const string type_str_v = map.AtObj(0).TypeToMqlStr();
const string table_values_nmae = m_table_prefix + "_values";
//---
if(m_use_map_value) // Talba de valores
{
data += StringFormat("const %s %s[%s] = \r\n", type_str_v, table_values_nmae
, m_final_table_size_def_name);
data += "{\r\n";
//---
const int last = m_perfect_hash.m_final_table_size - 1;
for(int i = 0; i < last; i++)
{
const int ri = m_out_slots[i]; // ri apunta al valor en si y key
if(ri == -1)
data += m_invalid_value + ",\r\n";
else
data += map.AtObj(ri).ToString() + ", // " + m_keys[ri] + "\r\n";
}
//---
const int ri = m_out_slots[last];
if(ri != -1)
data += string(map.AtObj(ri).ToString()) + " // " + m_keys[ri] + "\r\n";
else
data += m_invalid_value + "\r\n";
//---
data += "};\r\n";
}
else // Tabla de indices..
{
data += "const int " + table_slots_name + "[" + m_final_table_size_def_name + "] = \r\n";
data += "{\r\n";
const int last = m_perfect_hash.m_final_table_size - 1;
for(int i = 0; i < last; i++)
{
const int ri = m_out_slots[i];
if(ri == -1)
data += "-1, \r\n";
else
data += string(ri) + ", // " + m_keys[ri] + "\r\n";
}
//---
const int ri = m_out_slots[last];
if(ri == -1)
data += string(ri) + " // " + m_keys[ri] + "\r\n";
else
data += "-1\r\n";
//---
data += "};\r\n";
}
//---
data += "\r\n";
if(m_comment_func)
data += "/*\r\n";
//---
if(m_use_map_value) // Valor directo..
{
// Obtenemos el tipo del primer valoor
data += StringFormat("%s %s(const string& key)\r\n", type_str_v, m_func_name);
}
else
{
data += "<return-type> " + m_func_name + "(const string& key)\r\n";
}
data += "{\r\n";
//--- Hash listo
m_hash.BuildString(data);
//--- Paso intermedio
data += StringFormat(" const int seed_index = int(key_hash %% %s);\r\n", m_bucket_size_def_name);
//--- Algoritmo
m_hash_pf_1.BuildString(data, table_seeds_name);
//---
data += StringFormat(" const int fi = int(h %% %s);\r\n", m_final_table_size_def_name);
//---
if(m_use_map_value) // Valor directo.. (no pasa por indice)
{
data += StringFormat(" return %s[fi] == key_hash ? %s[fi] : %s;\r\n",
table_hash_name, table_values_nmae, m_invalid_value);
}
else
{
// Pasa por indice.. aqui dejamos que el usuario rellene
data += StringFormat(" return %s[fi] == key_hash ? <your table>[%s[fi]] : <return in case fail>;\r\n",
table_hash_name, table_slots_name);
}
//--- Cerramos.. funcion
data += "}\r\n";
// ---
if(m_comment_func)
data += "*/\r\n";
//---
if(is_n) // Cierre del namespace
data += "}\r\n";
//---
if(m_guard.Length())
data += "#endif // " + m_guard + "\r\n";
//---
uchar buff[];
int l = StringToCharArray(data, buff, 0, StringLen(data));
//---
uint writen = 0;
kernel32::WriteFile(handle, buff, sizeof(uchar) * l, writen, NULL);
kernel32::CloseHandle(handle);
//---
LogInfo(StringFormat("Perfect hash contruido existosamente revise el archivo:\n%s",
m_file_name_out), FUNCION_ACTUAL);
//---
CYmlNode bin = m_yml.GetRoot()["bin"];
if(bin.IsObject())
{
LogInfo("Guardando info extra", FUNCION_ACTUAL);
if(!SaveBin(bin))
return false;
}
else
{
LogCaution("No se esta guardando info extra", FUNCION_ACTUAL);
}
//---
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool CPerfectHashGenerator::SaveBin(CYmlNode& bin)
{
//---
uint writen = 0;
const int tsize = m_perfect_hash.m_final_table_size;
const int bsize = m_perfect_hash.m_buckets_size;
//--- Keys..
HANDLE handle = kernel32::CreateFileW(bin["seeds"].ToString(""), GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0);
if(handle == INVALID_HANDLE)
{
LogCriticalError(StringFormat("Fallo al crear archivo:\n'%s'\nUltimo err in kernel32 = %d",
bin["seeds"].ToString(""), kernel32::GetLastError()), FUNCION_ACTUAL);
return false;
}
kernel32::WriteFile(handle, m_out_seeds, sizeof(ulong) * bsize, writen, NULL);
kernel32::CloseHandle(handle);
//--- Hash..
handle = kernel32::CreateFileW(bin["hash"].ToString(""), GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0);
if(handle == INVALID_HANDLE)
{
LogCriticalError(StringFormat("Fallo al crear archivo:\n'%s'\nUltimo err in kernel32 = %d",
bin["hash"].ToString(""), kernel32::GetLastError()), FUNCION_ACTUAL);
return false;
}
kernel32::WriteFile(handle, m_hashes, sizeof(ulong) * tsize, writen, NULL);
kernel32::CloseHandle(handle);
//--- Valores
handle = kernel32::CreateFileW(bin["values"].ToString(""), GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0);
if(handle == INVALID_HANDLE)
{
LogCriticalError(StringFormat("Fallo al crear archivo:\n'%s'\nUltimo err in kernel32 = %d",
bin["values"].ToString(""), kernel32::GetLastError()), FUNCION_ACTUAL);
return false;
}
//---
if(m_use_map_value)
{
CYmlNode root = m_yml.GetRoot();
CYmlNode map = root["map"];
const int type = map.AtObj(0).GetType();
switch(type)
{
case TSN_YAML_VTYPE_STRING:
case TSN_YAML_VTYPE_USTRING:
case TSN_YAML_VTYPE_STR_MULTILINE:
{
// [len][string][len][string]
//---
uchar in_v[];
const int in_v_l = root["invalid_value"].ToStringUArray(in_v);
//---
for(int i = 0; i < tsize; i++)
{
const int k = m_out_slots[i];
if(k == -1)
{
// Invalido relleanmos con defualt..
kernel32::WriteFile(handle, in_v_l, sizeof(int), writen, NULL);
kernel32::WriteFile(handle, in_v, sizeof(uchar) * in_v_l, writen, NULL);
}
else
{
// Valido
uchar data[];
const int l = map.AtObj(m_out_slots[i]).ToStringUArray(data);
kernel32::WriteFile(handle, l, sizeof(int), writen, NULL);
kernel32::WriteFile(handle, data, sizeof(uchar) * l, writen, NULL);
}
}
break;
}
case TSN_YAML_VTYPE_INTEGER:
{
const long in_v = root["invalid_value"].ToInt(-1);
for(int i = 0; i < tsize; i++)
{
const int k = m_out_slots[i];
long v = k == -1 ? in_v : map.AtObj(k).ToInt(0);
kernel32::WriteFile(handle, v, sizeof(long), writen, NULL);
}
break;
}
case TSN_YAML_VTYPE_REAL:
{
const double in_v = root["invalid_value"].ToDouble(0.00);
for(int i = 0; i < m_table_size; i++)
{
const int k = m_out_slots[i];
double v = k == -1 ? in_v : map.AtObj(k).ToDouble(0.00);
kernel32::WriteFile(handle, v, sizeof(double), writen, NULL);
}
break;
}
case TSN_YAML_VTYPE_BOOL:
{
const bool in_v = root["invalid_value"].ToBool(false);
for(int i = 0; i < tsize; i++)
{
const int k = m_out_slots[i];
bool v = k == -1 ? in_v : map.AtObj(k).ToBool(false);
kernel32::WriteFile(handle, v, sizeof(bool), writen, NULL);
}
break;
}
}
}
else
{
kernel32::WriteFile(handle, m_out_slots, sizeof(int) * tsize, writen, NULL);
}
//---
kernel32::CloseHandle(handle);
//--- Meta
handle = kernel32::CreateFileW(bin["meta"].ToString(""), GENERIC_WRITE, 0, 0, CREATE_ALWAYS, 0, 0);
if(handle == INVALID_HANDLE)
{
LogCriticalError(StringFormat("Fallo al crear archivo:\n'%s'\nUltimo err in kernel32 = %d",
bin["meta"].ToString(""), kernel32::GetLastError()), FUNCION_ACTUAL);
return false;
}
kernel32::WriteFile(handle, bsize, sizeof(int), writen, NULL); // buckets size
kernel32::WriteFile(handle, tsize, sizeof(int), writen, NULL); // final table size
kernel32::CloseHandle(handle);
//---
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
bool CPerfectHashGenerator::RunAlg(void)
{
//---
if(m_perfect_hash.RunWValue(m_prev_hashes, m_pos, m_out_seeds, m_out_slots, m_hashes))
{
return Save();
}
//---
LogError("No se pudo generar el hash perfecto", FUNCION_ACTUAL);
return false;
}
}
//+------------------------------------------------------------------+
#endif // SIMPHASH_SRC_PERFECTHASH_MQH