PerfectHashByLeo/Src/Main.mqh
Nique_372 c82ef3872b
2026-06-28 19:10:47 -05:00

488 lines
14 KiB
MQL5

//+------------------------------------------------------------------+
//| Main.mqh |
//| Copyright 2026, Niquel Mendoza. |
//| https://www.mql5.com/ |
//+------------------------------------------------------------------+
#property copyright "Copyright 2026, Niquel Mendoza."
#property link "https://www.mql5.com/"
#property strict
#ifndef PERFECTHASHBYLEO_SRC_MAIN_MQH
#define PERFECTHASHBYLEO_SRC_MAIN_MQH
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
#include "Defines.mqh"
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
namespace TSN
{
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
template <typename TKey>
class CPerfectHashByLeo
{
public:
// Solo lectura
int m_final_table_size; // Tamaño de la tabla final (valores) [m]
int m_buckets_size; // Tamaño de buckets inciales
private:
//--- Parametros
ulong m_max_attems; // Maximo valor de la semilla (intentos.. casi)
//--- Hashesers
bool m_delete_hashers;
CPerfectHashHasher<TKey>* m_hasher_0;
CPerfectHashHasherWSeed<TKey>* m_hasher_1;
//--- Arrays internos
long m_buckets[]; // Buckets (size=m_buckets_size)
bool m_ocupped[]; // Ocuapdos en la tabla final (size=m_final_table_size)
int m_slots_oc_buckket[TSN_PHBL_MAX_SIZE_BUCKETS]; // Slots ocupados por buckets..
// Buckets count
struct BucketPos
{
int array_pos[TSN_PHBL_MAX_SIZE_BUCKETS];
};
BucketPos m_pos[];
int m_pos_size;
int m_pos_reserve;
//---
int m_table_size;
//---
void Sort(int left, int right);
public:
CPerfectHashByLeo(void);
~CPerfectHashByLeo(void);
//--- Getters\Setters
// Delete hashers
void DeleteHashers(bool v) { m_delete_hashers = v; }
__forceinline bool DeleteHashers() const { return m_delete_hashers; }
// Hash 0
CPerfectHashHasher<TKey>* Hasher0() { return m_hasher_0; }
void Hasher0(CPerfectHashHasher<TKey>* v) { m_hasher_0 = v; }
// Hash 1
CPerfectHashHasherWSeed<TKey>* Hasher1() { return m_hasher_1; }
void Hasher1(CPerfectHashHasherWSeed<TKey>* v) { m_hasher_1 = v; }
// Max attemps
void MaxValSeed(ulong v) { m_max_attems = v; }
__forceinline ulong MaxValSeed() { return m_max_attems; }
//--- Final getters
__forceinline int BuckestSize() const { return m_buckets_size; }
__forceinline int FinalTableSize() const { return m_final_table_size; }
//---
// Completo
void InitAlg(const int size_keys, double factor_f, int elementos_por_bucket);
// Mas bajo nivel
void InitAlg(const int bucket_size, const int final_table_size);
//--- Run
bool Run(const TKey& keys[], ulong& out_seeds[]);
template <typename TValue>
bool RunWValue(const TKey & keys[], const TValue& in_value[], ulong & out_seeds[],
TValue& out_value[], TKey& out_keys[]);
// Tamaño de out_seeds = m_buckets_size
};
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
template <typename TKey> CPerfectHashByLeo::CPerfectHashByLeo()
: m_hasher_0(NULL), m_hasher_1(NULL), m_delete_hashers(false), m_final_table_size(0),
m_buckets_size(0), m_max_attems(1500)
{
//---
m_pos_reserve = 16; // Solo para 16 buckets.. luego crece..
ArrayResize(m_pos, m_pos_reserve, m_pos_reserve);
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
template <typename TKey> CPerfectHashByLeo::~CPerfectHashByLeo()
{
if(m_delete_hashers)
{
if(m_hasher_0 != NULL)
delete m_hasher_0;
if(m_hasher_1 != NULL)
delete m_hasher_1;
}
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
template <typename TKey>
void CPerfectHashByLeo::Sort(int left, int right)
{
//---
if(left >= right)
return;
//---
int pivotIndex = (left + right) >> 1;
int pivotValue = TSN_PHBL_GET_COUNT(m_buckets[pivotIndex]);
int i = left, j = right;
while(i <= j)
{
while(TSN_PHBL_GET_COUNT(m_buckets[i]) > pivotValue)
i++;
while(TSN_PHBL_GET_COUNT(m_buckets[j]) < pivotValue)
j--;
if(i <= j)
{
long temp = m_buckets[i];
m_buckets[i] = m_buckets[j];
m_buckets[j] = temp;
i++;
j--;
}
}
//---
Sort(left, j);
Sort(i, right);
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
template <typename TKey>
void CPerfectHashByLeo::InitAlg(const int size_keys, double factor_f, int elementos_por_bucket)
{
//--- Prev
const int prev_bucket_size = m_buckets_size;
const int prev_fs = m_final_table_size;
//--- Calc
m_buckets_size = int(size_keys / (double)elementos_por_bucket);
m_final_table_size = int(size_keys / factor_f);
//--- Resize
if(m_buckets_size >= prev_bucket_size)
{
ArrayResize(m_buckets, m_buckets_size, m_buckets_size);
}
if(m_final_table_size >= prev_fs)
{
ArrayResize(m_ocupped, m_final_table_size, m_final_table_size);
}
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
template <typename TKey>
void CPerfectHashByLeo::InitAlg(const int bucket_size, const int final_table_size)
{
//--- Prev
const int prev_bucket_size = m_buckets_size;
const int prev_fs = m_final_table_size;
//--- Calc
m_buckets_size = bucket_size;
m_final_table_size = final_table_size;
//--- Resize
if(bucket_size >= prev_bucket_size)
{
ArrayResize(m_buckets, m_buckets_size, m_buckets_size);
}
if(final_table_size >= prev_fs)
{
ArrayResize(m_ocupped, m_final_table_size, m_final_table_size);
}
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
template <typename TKey>
bool CPerfectHashByLeo::Run(const TKey & keys[], ulong & out_seeds[])
{
//---
m_table_size = ArraySize(keys); // Tamaño real de keys...
//--- Inicializacion
// Arrays check
if(m_table_size >= m_final_table_size)
{
m_final_table_size = m_table_size;
ArrayResize(m_ocupped, m_final_table_size, m_final_table_size);
}
// Out seed
ArrayResize(out_seeds, m_buckets_size);
// Pos
m_pos_size = 0;
//--- Relleno
for(int i = 0; i < m_buckets_size; i++)
{
m_buckets[i] = long(0x7FFFF) << TSN_PHBL_BIT_START_I_BUCKET | long(i) << TSN_PHBL_BIT_START_POS_I;
}
ArrayInitialize(m_ocupped, false);
ArrayInitialize(m_slots_oc_buckket, -1);
//--- Hash inicial
// 12 bit = num | 16 bit = bucket start | 32 bit (posicion en array pos de este key)
for(int i = 0; i < m_table_size; i++)
{
const int p = int(m_hasher_0.Hash(keys[i]) % m_buckets_size);
const long lv = m_buckets[p];
int sib = TSN_PHBL_GET_INDEX(lv);
if(sib != 0x7FFFF)
{
sib = m_pos_size++; // Obtenemos nuevo indice..
if(m_pos_size >= m_pos_reserve)
{
m_pos_reserve += (m_pos_size >> 1) + 1; // 50 % mas..
ArrayResize(m_pos, m_pos_reserve, m_pos_reserve);
}
}
//---
int curr = TSN_PHBL_GET_COUNT(lv); // Tamaño actual
m_pos[sib].array_pos[curr++] = i; // Nueva posicion a este array (vista)
//--- Final empaquetamos todo
m_buckets[p] = TSN_PHBL_EMPAQUETAR(curr, sib, TSN_PHBL_GET_POS(lv));
}
//--- Ordenamos buckets..
Sort(0, m_buckets_size - 1);
//--- Algoritmo...
for(int bi = 0; bi < m_buckets_size; bi++)
{
//---
const long full_v = m_buckets[bi];
const int bucket_size = TSN_PHBL_GET_COUNT(full_v);
if(bucket_size < 1)
continue;
//---
const int start_pos = TSN_PHBL_GET_POS(full_v);
const int pos_index = TSN_PHBL_GET_INDEX(full_v);
//--- Iteramos por este bucket...
ulong d = 0;
while(true)
{
bool ok = true;
//---
for(int k = 0; k < bucket_size && ok; k++)
{
//---
const int key_index = m_pos[pos_index].array_pos[k]; // Indice de la clave
// Posicion final..
const int pos = int(m_hasher_1.Hash(keys[key_index], d) % m_final_table_size);
//--- Verificar si este slot ya se ocupa
if(m_ocupped[pos])
{
ok = false;
break;
}
//---
for(int j = 0; j < k; j++)
{
if(m_slots_oc_buckket[j] == pos)
{
ok = false;
break;
}
}
//---
m_slots_oc_buckket[k] = pos;
}
//---
if(ok)
{
out_seeds[start_pos] = d; // guardar seed
for(int k = 0; k < bucket_size; k++)
m_ocupped[m_slots_oc_buckket[k]] = true; // marcar ocupadas
break; // Salimos..
}
//---
d++;
if(d >= m_max_attems)
{
return false;
}
}
}
//---
return true;
}
//+------------------------------------------------------------------+
//| |
//+------------------------------------------------------------------+
// Notas:
// out_value y out keys requieren iniclaicion previa con el tamaño de final tabla size..
template <typename TKey>
template <typename TValue>
bool CPerfectHashByLeo::RunWValue(const TKey & keys[], const TValue& in_value[], ulong & out_seeds[],
TValue& out_value[], TKey& out_keys[])
{
// tamaño de in_value = keys
//---
m_table_size = ArraySize(keys); // Tamaño real de keys...
//--- Inicializacion
// Arrays check
if(m_table_size >= m_final_table_size)
{
m_final_table_size = m_table_size;
ArrayResize(m_ocupped, m_final_table_size, m_final_table_size);
}
// Out seed
ArrayResize(out_seeds, m_buckets_size);
ArrayInitialize(out_seeds, 0);
// ArrayResize(out_value, m_final_table_size);
// Pos
m_pos_size = 0;
//--- Relleno
for(int i = 0; i < m_buckets_size; i++)
{
m_buckets[i] = long(0x7FFFF) << TSN_PHBL_BIT_START_I_BUCKET | long(i) << TSN_PHBL_BIT_START_POS_I;
}
ArrayInitialize(m_ocupped, false);
ArrayInitialize(m_slots_oc_buckket, -1);
//--- Hash inicial
// 12 bit = num | 16 bit = bucket start | 32 bit (posicion en array pos de este key)
for(int i = 0; i < m_table_size; i++)
{
const int p = int(m_hasher_0.Hash(keys[i]) % (ulong)m_buckets_size);
const long lv = m_buckets[p];
int sib = TSN_PHBL_GET_INDEX(lv);
if(sib == 0x7FFFF) // Si aun no tiene inciado su sib.. le otorgamos uno..
{
sib = m_pos_size++; // Obtenemos nuevo indice..
if(m_pos_size >= m_pos_reserve)
{
m_pos_reserve += (m_pos_size >> 1) + 1; // 50 % mas..
ArrayResize(m_pos, m_pos_reserve, m_pos_reserve);
}
}
//---
int curr = TSN_PHBL_GET_COUNT(lv); // Tamaño actual
m_pos[sib].array_pos[curr++] = i; // Nueva posicion a este array (vista)
//--- Final empaquetamos todo
m_buckets[p] = TSN_PHBL_EMPAQUETAR(curr, sib, TSN_PHBL_GET_POS(lv));
}
//--- Ordenamos buckets..
Sort(0, m_buckets_size - 1);
//--- Algoritmo...
for(int bi = 0; bi < m_buckets_size; bi++)
{
//---
const long full_v = m_buckets[bi];
const int bucket_size = TSN_PHBL_GET_COUNT(full_v);
if(bucket_size < 1)
continue;
//---
const int start_pos = TSN_PHBL_GET_POS(full_v);
const int pos_index = TSN_PHBL_GET_INDEX(full_v);
//--- Iteramos por este bucket...
ulong d = 0;
while(true)
{
bool ok = true;
//---
for(int k = 0; k < bucket_size && ok; k++)
{
//---
const int key_index = m_pos[pos_index].array_pos[k]; // Indice de la clave
//Print("bi=", bi, " pos_index=", pos_index, " bucket_size=", bucket_size,
// " k=", k, " key_index=", key_index, " table_size=", m_table_size);
// Posicion final..
const int pos = int(m_hasher_1.Hash(keys[key_index], d) % (ulong)m_final_table_size);
//--- Verificar si este slot ya se ocupa
if(m_ocupped[pos])
{
ok = false;
break;
}
//---
for(int j = 0; j < k; j++)
{
if(m_slots_oc_buckket[j] == pos)
{
ok = false;
break;
}
}
//---
m_slots_oc_buckket[k] = pos;
}
//---
if(ok)
{
out_seeds[start_pos] = d; // guardar seed
for(int k = 0; k < bucket_size; k++)
{
const int ra = m_slots_oc_buckket[k];
m_ocupped[ra] = true; // marcar ocupadas
const int fi = m_pos[pos_index].array_pos[k];
out_value[ra] = in_value[fi];
out_keys[ra] = keys[fi];
}
break; // Salimos..
}
//---
d++;
if(d >= m_max_attems)
{
return false;
}
}
}
//---
return true;
}
//--- End namespace
}
//+------------------------------------------------------------------+
#endif // PERFECTHASHBYLEO_SRC_MAIN_MQH