mql5/Experts/Advisors/DualEA/Include/PolicyUpdater.mqh

540 lines
19 KiB
MQL5
Raw Permalink Normal View History

2025-10-03 01:38:36 -04:00
//+------------------------------------------------------------------+
//| PolicyUpdater.mqh |
//| Purpose: Auto-updating ML policy system |
//| Features: Learn from trades, update policy.json automatically |
//+------------------------------------------------------------------+
#ifndef __POLICYUPDATER_MQH__
#define __POLICYUPDATER_MQH__
#include "LearningBridge.mqh"
#include <Files/File.mqh>
#include <Generic/HashMap.mqh>
// Policy entry for a strategy/symbol/timeframe combination
struct PolicyEntry
{
string strategy;
string symbol;
int timeframe;
double probability; // ML-predicted win rate
double sl_scale; // SL multiplier
double tp_scale; // TP multiplier
double trail_atr_mult; // Trailing stop ATR multiplier
double min_confidence; // Minimum confidence threshold
// Learning metrics
int total_trades;
int winning_trades;
double total_profit;
datetime last_update;
2026-02-24 12:47:37 -05:00
// NEW: Advanced metrics (P0 improvement)
double gross_profit; // Total winning trades amount
double gross_loss; // Total losing trades amount (absolute)
double expectancy; // (win_rate * avg_win) - (loss_rate * avg_loss)
double profit_factor; // gross_profit / gross_loss
double r_multiple_avg; // Average R-multiple per trade
double max_drawdown_pct; // Peak-to-trough drawdown
double sharpe_20; // 20-trade Sharpe ratio
double peak_equity; // For drawdown calculation
double current_equity; // Running equity curve
// NEW: Trade history buffer for advanced stats (last 20 trades)
double recent_pnls[20];
int recent_count;
// NEW: Regime-aware tracking
string market_regime; // "trending", "ranging", "volatile"
double regime_performance[3]; // Performance per regime index
2025-10-03 01:38:36 -04:00
};
//+------------------------------------------------------------------+
//| Policy Updater - ML-driven policy learning |
//+------------------------------------------------------------------+
class CPolicyUpdater
{
private:
CLearningBridge* m_learning;
2025-10-16 18:03:12 -04:00
// Simple array-based policy storage (MQL5 HashMap doesn't support structs)
string m_policy_keys[];
PolicyEntry m_policy_values[];
2025-10-03 01:38:36 -04:00
string m_policy_file_path;
int m_update_interval_minutes;
datetime m_last_update;
bool m_auto_update_enabled;
// Statistics
int m_total_updates;
int m_successful_updates;
2026-02-24 12:47:37 -05:00
// NEW: Real-time update trigger
int m_trades_since_update;
int m_trades_update_threshold; // Write policy after N trades
2025-10-16 18:03:12 -04:00
// Helper methods for policy storage
int FindPolicyIndex(const string key)
{
for(int i = 0; i < ArraySize(m_policy_keys); i++)
{
if(m_policy_keys[i] == key) return i;
}
return -1;
}
bool GetPolicy(const string key, PolicyEntry &out_policy)
{
int idx = FindPolicyIndex(key);
if(idx >= 0)
{
out_policy = m_policy_values[idx];
return true;
}
return false;
}
void SetPolicy(const string key, const PolicyEntry &policy)
{
int idx = FindPolicyIndex(key);
if(idx >= 0)
{
m_policy_values[idx] = policy;
}
else
{
int size = ArraySize(m_policy_keys);
ArrayResize(m_policy_keys, size + 1);
ArrayResize(m_policy_values, size + 1);
m_policy_keys[size] = key;
m_policy_values[size] = policy;
}
}
// Load policies from JSON file
2025-10-03 01:38:36 -04:00
string GetPolicyKey(const string strategy, const string symbol, const int timeframe)
{
return StringFormat("%s_%s_%d", strategy, symbol, timeframe);
}
2026-02-24 12:47:37 -05:00
// Learn optimal parameters from trade history with advanced metrics
2025-10-03 01:38:36 -04:00
void LearnFromTrades(PolicyEntry &policy)
{
if(m_learning == NULL) return;
// Calculate actual win rate from trades
if(policy.total_trades > 5)
{
double actual_win_rate = (double)policy.winning_trades / policy.total_trades;
double avg_profit = policy.total_profit / policy.total_trades;
// Update probability (exponential moving average)
policy.probability = policy.probability * 0.7 + actual_win_rate * 0.3;
2026-02-24 12:47:37 -05:00
// NEW: Calculate profit factor
if(policy.gross_loss > 0)
policy.profit_factor = policy.gross_profit / policy.gross_loss;
else if(policy.gross_profit > 0)
policy.profit_factor = 999.0; // No losses yet
// NEW: Calculate expectancy
double loss_rate = 1.0 - actual_win_rate;
double avg_win = policy.winning_trades > 0 ? policy.gross_profit / policy.winning_trades : 0;
double avg_loss = (policy.total_trades - policy.winning_trades) > 0 ?
policy.gross_loss / (policy.total_trades - policy.winning_trades) : 0;
policy.expectancy = (actual_win_rate * avg_win) - (loss_rate * avg_loss);
// NEW: Calculate 20-trade Sharpe ratio if we have enough data
if(policy.recent_count >= 5)
{
double mean_pnl = 0, variance = 0;
for(int i = 0; i < policy.recent_count; i++)
mean_pnl += policy.recent_pnls[i];
mean_pnl /= policy.recent_count;
for(int i = 0; i < policy.recent_count; i++)
variance += MathPow(policy.recent_pnls[i] - mean_pnl, 2);
variance /= policy.recent_count;
double std_dev = MathSqrt(variance);
if(std_dev > 0)
policy.sharpe_20 = mean_pnl / std_dev;
}
// Adjust SL/TP based on profit factor (more robust than raw profit)
if(policy.profit_factor > 1.5 && policy.expectancy > 0)
2025-10-03 01:38:36 -04:00
{
2026-02-24 12:47:37 -05:00
// Profitable with good profit factor - can be slightly more aggressive
policy.sl_scale = MathMin(1.5, policy.sl_scale * 1.02);
policy.tp_scale = MathMax(1.0, policy.tp_scale * 1.02);
2025-10-03 01:38:36 -04:00
}
2026-02-24 12:47:37 -05:00
else if(policy.profit_factor < 1.0 || policy.expectancy < 0)
2025-10-03 01:38:36 -04:00
{
2026-02-24 12:47:37 -05:00
// Poor profit factor or negative expectancy - tighten up
policy.sl_scale = MathMax(0.7, policy.sl_scale * 0.98);
policy.tp_scale = MathMin(2.0, policy.tp_scale * 0.98);
2025-10-03 01:38:36 -04:00
}
2026-02-24 12:47:37 -05:00
// Adjust confidence threshold based on Sharpe and expectancy
if(policy.sharpe_20 > 0.5 && policy.expectancy > 0)
2025-10-03 01:38:36 -04:00
{
2026-02-24 12:47:37 -05:00
// Good risk-adjusted returns - can lower threshold
policy.min_confidence = MathMax(0.3, policy.min_confidence * 0.99);
2025-10-03 01:38:36 -04:00
}
2026-02-24 12:47:37 -05:00
else if(policy.sharpe_20 < 0.0 || policy.max_drawdown_pct > 10.0)
2025-10-03 01:38:36 -04:00
{
2026-02-24 12:47:37 -05:00
// Negative Sharpe or high drawdown - raise threshold
policy.min_confidence = MathMin(0.8, policy.min_confidence * 1.01);
2025-10-03 01:38:36 -04:00
}
policy.last_update = TimeCurrent();
}
}
2026-02-24 12:47:37 -05:00
// Write policy to JSON file with atomic writes and versioning
2025-10-03 01:38:36 -04:00
bool WritePolicyFile()
{
2026-02-24 12:47:37 -05:00
// ATOMIC WRITE: Write to temp file first, then rename
string temp_path = m_policy_file_path + ".tmp";
string backup_path = m_policy_file_path + ".backup";
int handle = FileOpen(temp_path, FILE_WRITE|FILE_TXT|FILE_COMMON|FILE_ANSI);
2025-10-03 01:38:36 -04:00
if(handle == INVALID_HANDLE)
{
2026-02-24 12:47:37 -05:00
PrintFormat("❌ PolicyUpdater: Failed to open temp file for writing: %s (Error: %d)",
temp_path, GetLastError());
2025-10-03 01:38:36 -04:00
return false;
}
2026-02-24 12:47:37 -05:00
// VERSIONING: Increment version on each write
static int policy_version = 1;
policy_version++;
// Write JSON header with version
2025-10-03 01:38:36 -04:00
FileWriteString(handle, "{\n");
2026-02-24 12:47:37 -05:00
FileWriteString(handle, StringFormat(" \"version\": \"1.0.%d\",\n", policy_version));
FileWriteString(handle, " \"schema_version\": \"2.0\",\n");
2025-10-03 01:38:36 -04:00
FileWriteString(handle, StringFormat(" \"last_updated\": \"%s\",\n", TimeToString(TimeCurrent())));
2025-10-16 18:03:12 -04:00
FileWriteString(handle, StringFormat(" \"total_policies\": %d,\n", ArraySize(m_policy_keys)));
2026-02-24 12:47:37 -05:00
FileWriteString(handle, " \"write_mode\": \"atomic\",\n");
2025-10-03 01:38:36 -04:00
FileWriteString(handle, " \"policies\": [\n");
// Write each policy entry
int count = 0;
2025-10-16 18:03:12 -04:00
for(int i = 0; i < ArraySize(m_policy_keys); i++)
2025-10-03 01:38:36 -04:00
{
2025-10-16 18:03:12 -04:00
PolicyEntry policy = m_policy_values[i];
2025-10-03 01:38:36 -04:00
{
if(count > 0)
FileWriteString(handle, ",\n");
string entry = StringFormat(
" {\n"
" \"strategy\": \"%s\",\n"
" \"symbol\": \"%s\",\n"
" \"timeframe\": %d,\n"
" \"probability\": %.4f,\n"
" \"sl_scale\": %.2f,\n"
" \"tp_scale\": %.2f,\n"
" \"trail_atr_mult\": %.1f,\n"
" \"min_confidence\": %.2f,\n"
" \"total_trades\": %d,\n"
" \"winning_trades\": %d,\n"
2026-02-24 12:47:37 -05:00
" \"win_rate\": %.2f,\n"
" \"profit_factor\": %.2f,\n"
" \"expectancy\": %.2f,\n"
" \"sharpe_20\": %.2f,\n"
" \"max_drawdown_pct\": %.2f\n"
2025-10-03 01:38:36 -04:00
" }",
policy.strategy, policy.symbol, policy.timeframe,
policy.probability, policy.sl_scale, policy.tp_scale,
policy.trail_atr_mult, policy.min_confidence,
policy.total_trades, policy.winning_trades,
2026-02-24 12:47:37 -05:00
policy.total_trades > 0 ? (double)policy.winning_trades/policy.total_trades : 0.0,
policy.profit_factor,
policy.expectancy,
policy.sharpe_20,
policy.max_drawdown_pct
2025-10-03 01:38:36 -04:00
);
FileWriteString(handle, entry);
count++;
}
}
FileWriteString(handle, "\n ]\n");
FileWriteString(handle, "}\n");
FileClose(handle);
2026-02-24 12:47:37 -05:00
// ATOMIC: Backup existing file if it exists
int existing_handle = FileOpen(m_policy_file_path, FILE_READ|FILE_TXT|FILE_COMMON);
if(existing_handle != INVALID_HANDLE)
{
FileClose(existing_handle);
// Copy existing to backup
int backup_handle = FileOpen(backup_path, FILE_WRITE|FILE_TXT|FILE_COMMON|FILE_ANSI);
if(backup_handle != INVALID_HANDLE)
{
int old_handle = FileOpen(m_policy_file_path, FILE_READ|FILE_TXT|FILE_COMMON|FILE_ANSI);
if(old_handle != INVALID_HANDLE)
{
while(!FileIsEnding(old_handle))
{
string line = FileReadString(old_handle);
FileWriteString(backup_handle, line + "\n");
}
FileClose(old_handle);
}
FileClose(backup_handle);
}
}
// ATOMIC: Rename temp to final
FileDelete(m_policy_file_path, FILE_COMMON);
FileMove(temp_path, FILE_COMMON, m_policy_file_path, FILE_COMMON|FILE_REWRITE);
PrintFormat("✅ PolicyUpdater: Atomically updated policy file with %d entries (version %d)", count, policy_version);
2025-10-03 01:38:36 -04:00
return true;
}
public:
CPolicyUpdater(CLearningBridge* learning, int update_interval_minutes = 60)
{
m_learning = learning;
m_policy_file_path = "DualEA\\policy.json";
m_update_interval_minutes = update_interval_minutes;
m_last_update = 0;
m_auto_update_enabled = true;
m_total_updates = 0;
m_successful_updates = 0;
2026-02-24 12:47:37 -05:00
// NEW: Real-time update configuration
m_trades_since_update = 0;
m_trades_update_threshold = 5; // Update policy file after every 5 trades
2025-10-03 01:38:36 -04:00
// Initialize default policies for common combinations
InitializeDefaultPolicies();
// Write initial policy file
if(WritePolicyFile())
m_successful_updates++;
2026-02-24 12:47:37 -05:00
PrintFormat("🎯 PolicyUpdater initialized: auto-update every %d minutes, real-time updates every %d trades",
m_update_interval_minutes, m_trades_update_threshold);
2025-10-03 01:38:36 -04:00
}
void InitializeDefaultPolicies()
{
string strategies[] = {
"MovingAverageStrategy", "ADXStrategy", "RSIStrategy", "BollingerBandsStrategy",
"MACDStrategy", "StochasticStrategy", "IchimokuStrategy", "MomentumStrategy"
};
string symbols[] = {"US500", "UK100", "GER40", "EURUSD", "GBPUSD", "USDJPY"};
int timeframes[] = {60, 240, 1440}; // H1, H4, D1
for(int s = 0; s < ArraySize(strategies); s++)
{
for(int sym = 0; sym < ArraySize(symbols); sym++)
{
for(int tf = 0; tf < ArraySize(timeframes); tf++)
{
PolicyEntry policy;
policy.strategy = strategies[s];
policy.symbol = symbols[sym];
policy.timeframe = timeframes[tf];
// Conservative defaults
policy.probability = 0.55;
policy.sl_scale = 1.0;
policy.tp_scale = 1.2;
policy.trail_atr_mult = 2.0;
policy.min_confidence = 0.45; // Lower than before to allow more signals
policy.total_trades = 0;
policy.winning_trades = 0;
policy.total_profit = 0.0;
policy.last_update = TimeCurrent();
string key = GetPolicyKey(policy.strategy, policy.symbol, policy.timeframe);
2025-10-16 18:03:12 -04:00
SetPolicy(key, policy);
2025-10-03 01:38:36 -04:00
}
}
}
2025-10-16 18:03:12 -04:00
PrintFormat("✅ Initialized %d default policy entries", ArraySize(m_policy_keys));
2025-10-03 01:38:36 -04:00
}
// Record trade outcome for policy learning
void RecordTradeOutcome(const string strategy, const string symbol, const int timeframe,
bool won, double profit)
{
string key = GetPolicyKey(strategy, symbol, timeframe);
PolicyEntry policy;
2025-10-16 18:03:12 -04:00
if(!GetPolicy(key, policy))
2025-10-03 01:38:36 -04:00
{
// Create new policy entry
policy.strategy = strategy;
policy.symbol = symbol;
policy.timeframe = timeframe;
policy.probability = 0.5;
policy.sl_scale = 1.0;
policy.tp_scale = 1.2;
policy.trail_atr_mult = 2.0;
policy.min_confidence = 0.5;
policy.total_trades = 0;
policy.winning_trades = 0;
policy.total_profit = 0.0;
policy.last_update = TimeCurrent();
2026-02-24 12:47:37 -05:00
// NEW: Initialize advanced metrics
policy.gross_profit = 0.0;
policy.gross_loss = 0.0;
policy.expectancy = 0.0;
policy.profit_factor = 1.0;
policy.r_multiple_avg = 0.0;
policy.max_drawdown_pct = 0.0;
policy.sharpe_20 = 0.0;
policy.peak_equity = 0.0;
policy.current_equity = 0.0;
policy.recent_count = 0;
policy.market_regime = "";
ArrayInitialize(policy.recent_pnls, 0.0);
ArrayInitialize(policy.regime_performance, 0.0);
}
// NEW: Update gross profit/loss for profit factor calculation
if(profit > 0)
policy.gross_profit += profit;
else
policy.gross_loss += MathAbs(profit);
// NEW: Update equity curve for drawdown calculation
policy.current_equity += profit;
if(policy.current_equity > policy.peak_equity)
policy.peak_equity = policy.current_equity;
// NEW: Calculate running drawdown
if(policy.peak_equity > 0)
{
double current_dd = (policy.peak_equity - policy.current_equity) / policy.peak_equity * 100.0;
if(current_dd > policy.max_drawdown_pct)
policy.max_drawdown_pct = current_dd;
}
// NEW: Update recent trades buffer for Sharpe calculation
if(policy.recent_count < 20)
{
policy.recent_pnls[policy.recent_count] = profit;
policy.recent_count++;
}
else
{
// Shift array (FIFO)
for(int i = 0; i < 19; i++)
policy.recent_pnls[i] = policy.recent_pnls[i+1];
policy.recent_pnls[19] = profit;
2025-10-03 01:38:36 -04:00
}
// Update trade statistics
policy.total_trades++;
if(won)
policy.winning_trades++;
policy.total_profit += profit;
// Learn from this data
LearnFromTrades(policy);
// Save updated policy
2025-10-16 18:03:12 -04:00
SetPolicy(key, policy);
2026-02-24 12:47:37 -05:00
// NEW: Real-time policy update trigger
m_trades_since_update++;
if(m_trades_since_update >= m_trades_update_threshold)
{
if(WritePolicyFile())
{
m_successful_updates++;
m_last_update = TimeCurrent();
PrintFormat("🔄 PolicyUpdater: Real-time update after %d trades", m_trades_since_update);
}
m_trades_since_update = 0;
}
}
// Alias wrapper to avoid method signature confusion with other classes
void UpdatePolicyFromTrade(const string strategy, const string symbol, const int timeframe,
bool won, double profit)
{
RecordTradeOutcome(strategy, symbol, timeframe, won, profit);
2025-10-03 01:38:36 -04:00
}
// Auto-update policy file if interval elapsed
void CheckAndUpdate()
{
if(!m_auto_update_enabled)
return;
datetime now = TimeCurrent();
if(now - m_last_update < m_update_interval_minutes * 60)
return;
m_total_updates++;
if(WritePolicyFile())
{
m_successful_updates++;
m_last_update = now;
PrintFormat("🔄 PolicyUpdater: Auto-updated policy file (%d/%d successful updates)",
m_successful_updates, m_total_updates);
}
}
// Force immediate update
void ForceUpdate()
{
m_total_updates++;
if(WritePolicyFile())
m_successful_updates++;
}
// Get policy for specific combination
bool GetPolicy(const string strategy, const string symbol, const int timeframe,
PolicyEntry &policy)
{
string key = GetPolicyKey(strategy, symbol, timeframe);
2025-10-16 18:03:12 -04:00
return GetPolicy(key, policy);
2025-10-03 01:38:36 -04:00
}
// Enable/disable auto-updates
void SetAutoUpdate(bool enabled)
{
m_auto_update_enabled = enabled;
PrintFormat("PolicyUpdater: Auto-update %s", enabled ? "ENABLED" : "DISABLED");
}
// Get statistics
void GetStatistics(int &total_policies, int &total_updates, int &successful_updates)
{
2025-10-16 18:03:12 -04:00
total_policies = ArraySize(m_policy_keys);
2025-10-03 01:38:36 -04:00
total_updates = m_total_updates;
successful_updates = m_successful_updates;
}
void PrintReport()
{
PrintFormat("\n=== 📈 Policy Learning Report ===");
2025-10-16 18:03:12 -04:00
PrintFormat("Total Policies: %d", ArraySize(m_policy_keys));
2025-10-03 01:38:36 -04:00
PrintFormat("Total Updates: %d", m_total_updates);
PrintFormat("Successful Updates: %d (%.1f%%)", m_successful_updates,
m_total_updates > 0 ? (double)m_successful_updates/m_total_updates*100 : 0);
PrintFormat("Auto-Update: %s (interval: %d minutes)",
m_auto_update_enabled ? "ON" : "OFF", m_update_interval_minutes);
}
};
#endif