992 lines
32 KiB
MQL5
992 lines
32 KiB
MQL5
|
//+------------------------------------------------------------------+
|
||
|
//| MLPredictor.mqh |
|
||
|
//| Machine Learning Prediction Module |
|
||
|
//| Neural Network & Ensemble Learning |
|
||
|
//+------------------------------------------------------------------+
|
||
|
#ifndef ML_PREDICTOR_MQH
|
||
|
#define ML_PREDICTOR_MQH
|
||
|
|
||
|
#include "DataTypes_v71.mqh"
|
||
|
#include <Math/Stat/Math.mqh>
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| ML Model Types |
|
||
|
//+------------------------------------------------------------------+
|
||
|
enum ENUM_ML_MODEL
|
||
|
{
|
||
|
ML_RANDOM_FOREST = 0,
|
||
|
ML_NEURAL_NETWORK = 1,
|
||
|
ML_GRADIENT_BOOST = 2,
|
||
|
ML_ENSEMBLE = 3
|
||
|
};
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Neural Network Layer |
|
||
|
//+------------------------------------------------------------------+
|
||
|
struct NeuralLayer
|
||
|
{
|
||
|
double weights[][];
|
||
|
double biases[];
|
||
|
double activations[];
|
||
|
int input_size;
|
||
|
int output_size;
|
||
|
};
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| ML Predictor Class |
|
||
|
//+------------------------------------------------------------------+
|
||
|
class CMLPredictor
|
||
|
{
|
||
|
private:
|
||
|
//--- Configuration
|
||
|
int m_lookback_period;
|
||
|
int m_min_training_size;
|
||
|
bool m_use_deep_learning;
|
||
|
double m_learning_rate;
|
||
|
ENUM_ML_MODEL m_model_type;
|
||
|
|
||
|
//--- Neural network architecture
|
||
|
NeuralLayer m_layers[];
|
||
|
int m_layer_count;
|
||
|
int m_input_features;
|
||
|
int m_output_classes;
|
||
|
|
||
|
//--- Training data
|
||
|
double m_training_features[][];
|
||
|
double m_training_labels[];
|
||
|
int m_training_count;
|
||
|
|
||
|
//--- Model performance
|
||
|
double m_accuracy;
|
||
|
double m_precision;
|
||
|
double m_recall;
|
||
|
double m_f1_score;
|
||
|
datetime m_last_update;
|
||
|
|
||
|
//--- Feature scaling
|
||
|
double m_feature_mean[];
|
||
|
double m_feature_std[];
|
||
|
|
||
|
//--- Ensemble models
|
||
|
struct TreeNode
|
||
|
{
|
||
|
int feature_index;
|
||
|
double threshold;
|
||
|
int left_child;
|
||
|
int right_child;
|
||
|
double prediction;
|
||
|
bool is_leaf;
|
||
|
};
|
||
|
|
||
|
struct DecisionTree
|
||
|
{
|
||
|
TreeNode nodes[];
|
||
|
int node_count;
|
||
|
double feature_importance[];
|
||
|
};
|
||
|
|
||
|
DecisionTree m_trees[];
|
||
|
int m_tree_count;
|
||
|
|
||
|
//--- Helper methods
|
||
|
void InitializeNeuralNetwork(int hidden_layers[], int layer_sizes[]);
|
||
|
double ActivationFunction(double x, bool derivative = false);
|
||
|
void ForwardPass(double &inputs[], double &outputs[]);
|
||
|
void BackPropagation(double &inputs[], double &targets[]);
|
||
|
void UpdateWeights();
|
||
|
|
||
|
//--- Feature engineering
|
||
|
void ExtractAdvancedFeatures(string symbol, double &features[]);
|
||
|
void NormalizeFeatures(double &features[]);
|
||
|
void CalculateFeatureImportance();
|
||
|
|
||
|
//--- Ensemble methods
|
||
|
void TrainRandomForest();
|
||
|
void TrainGradientBoosting();
|
||
|
double PredictWithTree(DecisionTree &tree, double &features[]);
|
||
|
|
||
|
//--- Model persistence
|
||
|
string m_model_path;
|
||
|
|
||
|
public:
|
||
|
CMLPredictor();
|
||
|
~CMLPredictor();
|
||
|
|
||
|
//--- Initialization
|
||
|
bool Initialize(int lookback, int min_training, bool use_deep);
|
||
|
void SetModelType(ENUM_ML_MODEL type) { m_model_type = type; }
|
||
|
void SetLearningRate(double rate) { m_learning_rate = rate; }
|
||
|
|
||
|
//--- Training
|
||
|
bool Train(double &features[][], double &labels[]);
|
||
|
bool UpdateModel(ManagedTradeV71 &trades[]);
|
||
|
void AddTrainingExample(double &features[], double label);
|
||
|
|
||
|
//--- Prediction
|
||
|
MLPrediction Predict(string symbol);
|
||
|
double PredictProbability(double &features[], ENUM_ORDER_TYPE direction);
|
||
|
void PredictBatch(string symbols[], MLPrediction &predictions[]);
|
||
|
|
||
|
//--- Feature analysis
|
||
|
double GetFeatureImportance(int feature_index);
|
||
|
void GetTopFeatures(int &indices[], double &importances[], int count);
|
||
|
|
||
|
//--- Model evaluation
|
||
|
double GetAccuracy() { return m_accuracy; }
|
||
|
double GetPrecision() { return m_precision; }
|
||
|
double GetRecall() { return m_recall; }
|
||
|
double GetF1Score() { return m_f1_score; }
|
||
|
void EvaluateModel(double &test_features[][], double &test_labels[]);
|
||
|
|
||
|
//--- Model management
|
||
|
bool SaveModel(string filename);
|
||
|
bool LoadModel(string filename);
|
||
|
datetime GetLastUpdateTime() { return m_last_update; }
|
||
|
|
||
|
//--- Real-time learning
|
||
|
void OnlineUpdate(double &features[], double actual_result);
|
||
|
void AdaptToMarketRegime(ENUM_MARKET_REGIME regime);
|
||
|
};
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Constructor |
|
||
|
//+------------------------------------------------------------------+
|
||
|
CMLPredictor::CMLPredictor()
|
||
|
{
|
||
|
m_lookback_period = 100;
|
||
|
m_min_training_size = 100;
|
||
|
m_use_deep_learning = false;
|
||
|
m_learning_rate = 0.01;
|
||
|
m_model_type = ML_ENSEMBLE;
|
||
|
m_layer_count = 0;
|
||
|
m_input_features = 20;
|
||
|
m_output_classes = 3; // Buy, Sell, Hold
|
||
|
m_training_count = 0;
|
||
|
m_accuracy = 0;
|
||
|
m_precision = 0;
|
||
|
m_recall = 0;
|
||
|
m_f1_score = 0;
|
||
|
m_last_update = 0;
|
||
|
m_tree_count = 100; // Default forest size
|
||
|
m_model_path = "MLModels\\";
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Destructor |
|
||
|
//+------------------------------------------------------------------+
|
||
|
CMLPredictor::~CMLPredictor()
|
||
|
{
|
||
|
// Cleanup allocated memory
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Initialize ML predictor |
|
||
|
//+------------------------------------------------------------------+
|
||
|
bool CMLPredictor::Initialize(int lookback, int min_training, bool use_deep)
|
||
|
{
|
||
|
m_lookback_period = lookback;
|
||
|
m_min_training_size = min_training;
|
||
|
m_use_deep_learning = use_deep;
|
||
|
|
||
|
//--- Initialize feature scaling arrays
|
||
|
ArrayResize(m_feature_mean, m_input_features);
|
||
|
ArrayResize(m_feature_std, m_input_features);
|
||
|
ArrayInitialize(m_feature_mean, 0);
|
||
|
ArrayInitialize(m_feature_std, 1);
|
||
|
|
||
|
//--- Initialize neural network if deep learning
|
||
|
if(m_use_deep_learning)
|
||
|
{
|
||
|
int hidden_layers = 3;
|
||
|
int layer_sizes[] = {64, 32, 16}; // Architecture: 20->64->32->16->3
|
||
|
InitializeNeuralNetwork(hidden_layers, layer_sizes);
|
||
|
}
|
||
|
|
||
|
//--- Initialize training data arrays
|
||
|
ArrayResize(m_training_features, m_lookback_period);
|
||
|
ArrayResize(m_training_labels, m_lookback_period);
|
||
|
|
||
|
//--- Initialize ensemble
|
||
|
if(m_model_type == ML_ENSEMBLE || m_model_type == ML_RANDOM_FOREST)
|
||
|
{
|
||
|
ArrayResize(m_trees, m_tree_count);
|
||
|
}
|
||
|
|
||
|
Print("MLPredictor initialized: Model=", EnumToString(m_model_type),
|
||
|
", DeepLearning=", m_use_deep_learning);
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Initialize neural network architecture |
|
||
|
//+------------------------------------------------------------------+
|
||
|
void CMLPredictor::InitializeNeuralNetwork(int hidden_count, int layer_sizes[])
|
||
|
{
|
||
|
m_layer_count = hidden_count + 1; // Hidden layers + output layer
|
||
|
ArrayResize(m_layers, m_layer_count);
|
||
|
|
||
|
int prev_size = m_input_features;
|
||
|
|
||
|
//--- Initialize each layer
|
||
|
for(int i = 0; i < m_layer_count; i++)
|
||
|
{
|
||
|
int current_size = (i < hidden_count) ? layer_sizes[i] : m_output_classes;
|
||
|
|
||
|
m_layers[i].input_size = prev_size;
|
||
|
m_layers[i].output_size = current_size;
|
||
|
|
||
|
//--- Initialize weights with Xavier initialization
|
||
|
ArrayResize(m_layers[i].weights, current_size);
|
||
|
for(int j = 0; j < current_size; j++)
|
||
|
{
|
||
|
ArrayResize(m_layers[i].weights[j], prev_size);
|
||
|
|
||
|
double xavier_std = MathSqrt(2.0 / (prev_size + current_size));
|
||
|
|
||
|
for(int k = 0; k < prev_size; k++)
|
||
|
{
|
||
|
m_layers[i].weights[j][k] = MathRandomNormal(0, xavier_std);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//--- Initialize biases
|
||
|
ArrayResize(m_layers[i].biases, current_size);
|
||
|
ArrayInitialize(m_layers[i].biases, 0);
|
||
|
|
||
|
//--- Initialize activations
|
||
|
ArrayResize(m_layers[i].activations, current_size);
|
||
|
|
||
|
prev_size = current_size;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Make prediction for symbol |
|
||
|
//+------------------------------------------------------------------+
|
||
|
MLPrediction CMLPredictor::Predict(string symbol)
|
||
|
{
|
||
|
MLPrediction prediction;
|
||
|
prediction.prediction_time = TimeCurrent();
|
||
|
prediction.model_version = "1.0";
|
||
|
|
||
|
//--- Extract features
|
||
|
double features[];
|
||
|
ExtractAdvancedFeatures(symbol, features);
|
||
|
|
||
|
//--- Normalize features
|
||
|
NormalizeFeatures(features);
|
||
|
|
||
|
//--- Make prediction based on model type
|
||
|
double outputs[];
|
||
|
|
||
|
switch(m_model_type)
|
||
|
{
|
||
|
case ML_NEURAL_NETWORK:
|
||
|
{
|
||
|
ArrayResize(outputs, m_output_classes);
|
||
|
ForwardPass(features, outputs);
|
||
|
|
||
|
//--- Find class with highest probability
|
||
|
int best_class = 0;
|
||
|
double max_prob = outputs[0];
|
||
|
|
||
|
for(int i = 1; i < m_output_classes; i++)
|
||
|
{
|
||
|
if(outputs[i] > max_prob)
|
||
|
{
|
||
|
max_prob = outputs[i];
|
||
|
best_class = i;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//--- Convert to trading signal
|
||
|
if(best_class == 0) // Buy class
|
||
|
{
|
||
|
prediction.direction = ORDER_TYPE_BUY;
|
||
|
prediction.confidence = max_prob;
|
||
|
}
|
||
|
else if(best_class == 1) // Sell class
|
||
|
{
|
||
|
prediction.direction = ORDER_TYPE_SELL;
|
||
|
prediction.confidence = max_prob;
|
||
|
}
|
||
|
else // Hold
|
||
|
{
|
||
|
prediction.direction = ORDER_TYPE_BUY; // Default
|
||
|
prediction.confidence = 0; // No trade
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case ML_RANDOM_FOREST:
|
||
|
case ML_ENSEMBLE:
|
||
|
{
|
||
|
//--- Aggregate predictions from all trees
|
||
|
double buy_votes = 0;
|
||
|
double sell_votes = 0;
|
||
|
|
||
|
for(int i = 0; i < m_tree_count; i++)
|
||
|
{
|
||
|
double tree_pred = PredictWithTree(m_trees[i], features);
|
||
|
if(tree_pred > 0.5)
|
||
|
buy_votes++;
|
||
|
else if(tree_pred < -0.5)
|
||
|
sell_votes++;
|
||
|
}
|
||
|
|
||
|
//--- Determine direction
|
||
|
if(buy_votes > sell_votes && buy_votes > m_tree_count * 0.6)
|
||
|
{
|
||
|
prediction.direction = ORDER_TYPE_BUY;
|
||
|
prediction.confidence = buy_votes / m_tree_count;
|
||
|
}
|
||
|
else if(sell_votes > buy_votes && sell_votes > m_tree_count * 0.6)
|
||
|
{
|
||
|
prediction.direction = ORDER_TYPE_SELL;
|
||
|
prediction.confidence = sell_votes / m_tree_count;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
prediction.direction = ORDER_TYPE_BUY;
|
||
|
prediction.confidence = 0; // No clear signal
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
//--- Additional predictions
|
||
|
prediction.expected_return = prediction.confidence * 2.5; // Simplified
|
||
|
prediction.volatility = features[9]; // ATR feature
|
||
|
|
||
|
//--- Calculate stops
|
||
|
double atr = iATR(symbol, PERIOD_CURRENT, 14);
|
||
|
double current_price = (prediction.direction == ORDER_TYPE_BUY) ?
|
||
|
SymbolInfoDouble(symbol, SYMBOL_ASK) :
|
||
|
SymbolInfoDouble(symbol, SYMBOL_BID);
|
||
|
|
||
|
if(prediction.direction == ORDER_TYPE_BUY)
|
||
|
{
|
||
|
prediction.stop_loss = current_price - atr * 2;
|
||
|
prediction.take_profit = current_price + atr * 4;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
prediction.stop_loss = current_price + atr * 2;
|
||
|
prediction.take_profit = current_price - atr * 4;
|
||
|
}
|
||
|
|
||
|
//--- Optimal size based on Kelly
|
||
|
double kelly_fraction = (prediction.confidence * 2.5 - (1 - prediction.confidence)) / 2.5;
|
||
|
kelly_fraction = MathMax(0, MathMin(0.25, kelly_fraction * 0.3)); // Conservative Kelly
|
||
|
prediction.optimal_size = kelly_fraction;
|
||
|
|
||
|
return prediction;
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Extract advanced features for ML |
|
||
|
//+------------------------------------------------------------------+
|
||
|
void CMLPredictor::ExtractAdvancedFeatures(string symbol, double &features[])
|
||
|
{
|
||
|
ArrayResize(features, m_input_features);
|
||
|
|
||
|
//--- Price-based features
|
||
|
double close = iClose(symbol, PERIOD_CURRENT, 0);
|
||
|
double open = iOpen(symbol, PERIOD_CURRENT, 0);
|
||
|
double high = iHigh(symbol, PERIOD_CURRENT, 0);
|
||
|
double low = iLow(symbol, PERIOD_CURRENT, 0);
|
||
|
|
||
|
features[0] = (close - open) / open * 100; // Return
|
||
|
features[1] = (high - low) / low * 100; // Range
|
||
|
features[2] = (close - low) / (high - low + 0.00001); // Position in range
|
||
|
|
||
|
//--- Moving averages
|
||
|
double ma_5 = iMA(symbol, PERIOD_CURRENT, 5, 0, MODE_EMA, PRICE_CLOSE);
|
||
|
double ma_20 = iMA(symbol, PERIOD_CURRENT, 20, 0, MODE_EMA, PRICE_CLOSE);
|
||
|
double ma_50 = iMA(symbol, PERIOD_CURRENT, 50, 0, MODE_EMA, PRICE_CLOSE);
|
||
|
|
||
|
features[3] = (close - ma_5) / ma_5 * 100;
|
||
|
features[4] = (ma_5 - ma_20) / ma_20 * 100;
|
||
|
features[5] = (ma_20 - ma_50) / ma_50 * 100;
|
||
|
|
||
|
//--- Momentum indicators
|
||
|
double rsi = iRSI(symbol, PERIOD_CURRENT, 14, PRICE_CLOSE);
|
||
|
double cci = iCCI(symbol, PERIOD_CURRENT, 14, PRICE_TYPICAL);
|
||
|
double momentum = iMomentum(symbol, PERIOD_CURRENT, 14, PRICE_CLOSE);
|
||
|
|
||
|
features[6] = rsi / 100.0;
|
||
|
features[7] = cci / 200.0; // Normalize CCI
|
||
|
features[8] = (momentum - 100) / 100.0;
|
||
|
|
||
|
//--- Volatility
|
||
|
double atr = iATR(symbol, PERIOD_CURRENT, 14);
|
||
|
double stddev = iStdDev(symbol, PERIOD_CURRENT, 20, 0, MODE_SMA, PRICE_CLOSE);
|
||
|
|
||
|
features[9] = atr / close * 100;
|
||
|
features[10] = stddev / close * 100;
|
||
|
|
||
|
//--- Volume
|
||
|
long volume = iVolume(symbol, PERIOD_CURRENT, 0);
|
||
|
long avg_volume = 0;
|
||
|
for(int i = 1; i <= 20; i++)
|
||
|
{
|
||
|
avg_volume += iVolume(symbol, PERIOD_CURRENT, i);
|
||
|
}
|
||
|
avg_volume /= 20;
|
||
|
|
||
|
features[11] = (avg_volume > 0) ? (double)volume / avg_volume : 1.0;
|
||
|
|
||
|
//--- Market structure
|
||
|
int higher_highs = 0;
|
||
|
int lower_lows = 0;
|
||
|
for(int i = 1; i < 10; i++)
|
||
|
{
|
||
|
if(iHigh(symbol, PERIOD_CURRENT, i) > iHigh(symbol, PERIOD_CURRENT, i+1))
|
||
|
higher_highs++;
|
||
|
if(iLow(symbol, PERIOD_CURRENT, i) < iLow(symbol, PERIOD_CURRENT, i+1))
|
||
|
lower_lows++;
|
||
|
}
|
||
|
|
||
|
features[12] = higher_highs / 10.0;
|
||
|
features[13] = lower_lows / 10.0;
|
||
|
|
||
|
//--- Time-based features
|
||
|
MqlDateTime time;
|
||
|
TimeToStruct(TimeCurrent(), time);
|
||
|
|
||
|
features[14] = time.hour / 24.0;
|
||
|
features[15] = time.day_of_week / 7.0;
|
||
|
features[16] = time.day / 31.0;
|
||
|
|
||
|
//--- Microstructure
|
||
|
long spread = SymbolInfoInteger(symbol, SYMBOL_SPREAD);
|
||
|
features[17] = spread / 100.0;
|
||
|
|
||
|
//--- Sentiment (simplified)
|
||
|
features[18] = (rsi > 50) ? (rsi - 50) / 50 : (rsi - 50) / 50;
|
||
|
|
||
|
//--- Correlation with market (using EURUSD as proxy)
|
||
|
if(symbol != "EURUSD")
|
||
|
{
|
||
|
double symbol_return = (close - iClose(symbol, PERIOD_D1, 1)) / iClose(symbol, PERIOD_D1, 1);
|
||
|
double market_return = (iClose("EURUSD", PERIOD_D1, 0) - iClose("EURUSD", PERIOD_D1, 1)) /
|
||
|
iClose("EURUSD", PERIOD_D1, 1);
|
||
|
features[19] = symbol_return - market_return;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
features[19] = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Normalize features using z-score |
|
||
|
//+------------------------------------------------------------------+
|
||
|
void CMLPredictor::NormalizeFeatures(double &features[])
|
||
|
{
|
||
|
for(int i = 0; i < m_input_features; i++)
|
||
|
{
|
||
|
if(m_feature_std[i] > 0)
|
||
|
{
|
||
|
features[i] = (features[i] - m_feature_mean[i]) / m_feature_std[i];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Forward pass through neural network |
|
||
|
//+------------------------------------------------------------------+
|
||
|
void CMLPredictor::ForwardPass(double &inputs[], double &outputs[])
|
||
|
{
|
||
|
//--- Start with input features
|
||
|
double current_inputs[];
|
||
|
ArrayCopy(current_inputs, inputs);
|
||
|
|
||
|
//--- Pass through each layer
|
||
|
for(int layer = 0; layer < m_layer_count; layer++)
|
||
|
{
|
||
|
//--- Calculate activations for this layer
|
||
|
for(int neuron = 0; neuron < m_layers[layer].output_size; neuron++)
|
||
|
{
|
||
|
double sum = m_layers[layer].biases[neuron];
|
||
|
|
||
|
for(int input = 0; input < m_layers[layer].input_size; input++)
|
||
|
{
|
||
|
sum += current_inputs[input] * m_layers[layer].weights[neuron][input];
|
||
|
}
|
||
|
|
||
|
//--- Apply activation function
|
||
|
m_layers[layer].activations[neuron] = ActivationFunction(sum);
|
||
|
}
|
||
|
|
||
|
//--- Current layer outputs become next layer inputs
|
||
|
ArrayResize(current_inputs, m_layers[layer].output_size);
|
||
|
ArrayCopy(current_inputs, m_layers[layer].activations);
|
||
|
}
|
||
|
|
||
|
//--- Copy final layer outputs
|
||
|
ArrayResize(outputs, m_output_classes);
|
||
|
ArrayCopy(outputs, m_layers[m_layer_count-1].activations);
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Activation function (ReLU for hidden, Softmax for output) |
|
||
|
//+------------------------------------------------------------------+
|
||
|
double CMLPredictor::ActivationFunction(double x, bool derivative = false)
|
||
|
{
|
||
|
if(derivative)
|
||
|
{
|
||
|
return (x > 0) ? 1 : 0.01; // Leaky ReLU derivative
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
return (x > 0) ? x : 0.01 * x; // Leaky ReLU
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Update model with new trading results |
|
||
|
//+------------------------------------------------------------------+
|
||
|
bool CMLPredictor::UpdateModel(ManagedTradeV71 &trades[])
|
||
|
{
|
||
|
int trade_count = ArraySize(trades);
|
||
|
if(trade_count < m_min_training_size)
|
||
|
return false;
|
||
|
|
||
|
//--- Prepare training data from closed trades
|
||
|
double new_features[][];
|
||
|
double new_labels[];
|
||
|
int example_count = 0;
|
||
|
|
||
|
ArrayResize(new_features, trade_count);
|
||
|
ArrayResize(new_labels, trade_count);
|
||
|
|
||
|
for(int i = 0; i < trade_count; i++)
|
||
|
{
|
||
|
//--- Only use closed trades for training
|
||
|
if(trades[i].profit == 0)
|
||
|
continue;
|
||
|
|
||
|
//--- Extract features at trade entry
|
||
|
ArrayResize(new_features[example_count], m_input_features);
|
||
|
ExtractAdvancedFeatures(trades[i].symbol, new_features[example_count]);
|
||
|
|
||
|
//--- Label: 1 for profitable, -1 for loss, weighted by R-multiple
|
||
|
if(trades[i].profit > 0)
|
||
|
{
|
||
|
new_labels[example_count] = MathMin(1.0, trades[i].r_multiple / 2.0);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
new_labels[example_count] = MathMax(-1.0, trades[i].r_multiple / 2.0);
|
||
|
}
|
||
|
|
||
|
example_count++;
|
||
|
}
|
||
|
|
||
|
if(example_count < m_min_training_size)
|
||
|
return false;
|
||
|
|
||
|
//--- Resize to actual count
|
||
|
ArrayResize(new_features, example_count);
|
||
|
ArrayResize(new_labels, example_count);
|
||
|
|
||
|
//--- Update feature statistics
|
||
|
for(int i = 0; i < m_input_features; i++)
|
||
|
{
|
||
|
double sum = 0;
|
||
|
double sum_sq = 0;
|
||
|
|
||
|
for(int j = 0; j < example_count; j++)
|
||
|
{
|
||
|
sum += new_features[j][i];
|
||
|
sum_sq += new_features[j][i] * new_features[j][i];
|
||
|
}
|
||
|
|
||
|
m_feature_mean[i] = sum / example_count;
|
||
|
m_feature_std[i] = MathSqrt(sum_sq / example_count - m_feature_mean[i] * m_feature_mean[i]);
|
||
|
}
|
||
|
|
||
|
//--- Normalize features
|
||
|
for(int i = 0; i < example_count; i++)
|
||
|
{
|
||
|
NormalizeFeatures(new_features[i]);
|
||
|
}
|
||
|
|
||
|
//--- Train model
|
||
|
bool success = Train(new_features, new_labels);
|
||
|
|
||
|
if(success)
|
||
|
{
|
||
|
m_last_update = TimeCurrent();
|
||
|
|
||
|
//--- Evaluate on recent data
|
||
|
EvaluateModel(new_features, new_labels);
|
||
|
|
||
|
Print("ML Model updated: Examples=", example_count,
|
||
|
", Accuracy=", DoubleToString(m_accuracy, 2),
|
||
|
", F1=", DoubleToString(m_f1_score, 2));
|
||
|
}
|
||
|
|
||
|
return success;
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Train the model |
|
||
|
//+------------------------------------------------------------------+
|
||
|
bool CMLPredictor::Train(double &features[][], double &labels[])
|
||
|
{
|
||
|
int sample_count = ArrayRange(features, 0);
|
||
|
|
||
|
if(sample_count < m_min_training_size)
|
||
|
return false;
|
||
|
|
||
|
//--- Store training data
|
||
|
m_training_count = sample_count;
|
||
|
ArrayResize(m_training_features, sample_count);
|
||
|
ArrayCopy(m_training_labels, labels);
|
||
|
|
||
|
for(int i = 0; i < sample_count; i++)
|
||
|
{
|
||
|
ArrayResize(m_training_features[i], m_input_features);
|
||
|
ArrayCopy(m_training_features[i], features[i]);
|
||
|
}
|
||
|
|
||
|
//--- Train based on model type
|
||
|
switch(m_model_type)
|
||
|
{
|
||
|
case ML_NEURAL_NETWORK:
|
||
|
{
|
||
|
//--- Mini-batch gradient descent
|
||
|
int epochs = 100;
|
||
|
int batch_size = 32;
|
||
|
|
||
|
for(int epoch = 0; epoch < epochs; epoch++)
|
||
|
{
|
||
|
double total_loss = 0;
|
||
|
|
||
|
for(int batch = 0; batch < sample_count; batch += batch_size)
|
||
|
{
|
||
|
int batch_end = MathMin(batch + batch_size, sample_count);
|
||
|
|
||
|
for(int i = batch; i < batch_end; i++)
|
||
|
{
|
||
|
//--- Forward pass
|
||
|
double outputs[];
|
||
|
ForwardPass(m_training_features[i], outputs);
|
||
|
|
||
|
//--- Calculate targets
|
||
|
double targets[];
|
||
|
ArrayResize(targets, m_output_classes);
|
||
|
ArrayInitialize(targets, 0);
|
||
|
|
||
|
if(m_training_labels[i] > 0.5)
|
||
|
targets[0] = 1; // Buy
|
||
|
else if(m_training_labels[i] < -0.5)
|
||
|
targets[1] = 1; // Sell
|
||
|
else
|
||
|
targets[2] = 1; // Hold
|
||
|
|
||
|
//--- Backpropagation
|
||
|
BackPropagation(m_training_features[i], targets);
|
||
|
}
|
||
|
|
||
|
//--- Update weights
|
||
|
UpdateWeights();
|
||
|
}
|
||
|
|
||
|
//--- Decay learning rate
|
||
|
m_learning_rate *= 0.99;
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case ML_RANDOM_FOREST:
|
||
|
case ML_ENSEMBLE:
|
||
|
TrainRandomForest();
|
||
|
break;
|
||
|
|
||
|
case ML_GRADIENT_BOOST:
|
||
|
TrainGradientBoosting();
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Train random forest |
|
||
|
//+------------------------------------------------------------------+
|
||
|
void CMLPredictor::TrainRandomForest()
|
||
|
{
|
||
|
//--- Train each tree on bootstrap sample
|
||
|
for(int tree = 0; tree < m_tree_count; tree++)
|
||
|
{
|
||
|
//--- Bootstrap sampling
|
||
|
int sample_size = m_training_count;
|
||
|
double bootstrap_features[][];
|
||
|
double bootstrap_labels[];
|
||
|
|
||
|
ArrayResize(bootstrap_features, sample_size);
|
||
|
ArrayResize(bootstrap_labels, sample_size);
|
||
|
|
||
|
for(int i = 0; i < sample_size; i++)
|
||
|
{
|
||
|
int idx = MathRand() % m_training_count;
|
||
|
ArrayResize(bootstrap_features[i], m_input_features);
|
||
|
ArrayCopy(bootstrap_features[i], m_training_features[idx]);
|
||
|
bootstrap_labels[i] = m_training_labels[idx];
|
||
|
}
|
||
|
|
||
|
//--- Build tree (simplified)
|
||
|
m_trees[tree].node_count = 0;
|
||
|
ArrayResize(m_trees[tree].nodes, 100); // Max 100 nodes
|
||
|
ArrayResize(m_trees[tree].feature_importance, m_input_features);
|
||
|
|
||
|
//--- Create root node
|
||
|
TreeNode root;
|
||
|
root.is_leaf = false;
|
||
|
root.feature_index = MathRand() % m_input_features;
|
||
|
root.threshold = 0; // Simplified
|
||
|
root.left_child = -1;
|
||
|
root.right_child = -1;
|
||
|
|
||
|
//--- For simplicity, create a shallow tree
|
||
|
double sum_positive = 0;
|
||
|
double count_positive = 0;
|
||
|
|
||
|
for(int i = 0; i < sample_size; i++)
|
||
|
{
|
||
|
if(bootstrap_labels[i] > 0)
|
||
|
{
|
||
|
sum_positive++;
|
||
|
count_positive++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
root.prediction = (count_positive > sample_size / 2) ? 1 : -1;
|
||
|
root.is_leaf = true; // Simplified to single node
|
||
|
|
||
|
m_trees[tree].nodes[0] = root;
|
||
|
m_trees[tree].node_count = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Simplified tree prediction |
|
||
|
//+------------------------------------------------------------------+
|
||
|
double CMLPredictor::PredictWithTree(DecisionTree &tree, double &features[])
|
||
|
{
|
||
|
//--- Simplified: just return root prediction
|
||
|
if(tree.node_count > 0)
|
||
|
return tree.nodes[0].prediction;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Placeholder for backpropagation |
|
||
|
//+------------------------------------------------------------------+
|
||
|
void CMLPredictor::BackPropagation(double &inputs[], double &targets[])
|
||
|
{
|
||
|
//--- Simplified implementation
|
||
|
//--- In production, implement full backpropagation algorithm
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Placeholder for weight updates |
|
||
|
//+------------------------------------------------------------------+
|
||
|
void CMLPredictor::UpdateWeights()
|
||
|
{
|
||
|
//--- Simplified implementation
|
||
|
//--- In production, implement gradient descent weight updates
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Placeholder for gradient boosting |
|
||
|
//+------------------------------------------------------------------+
|
||
|
void CMLPredictor::TrainGradientBoosting()
|
||
|
{
|
||
|
//--- Simplified implementation
|
||
|
TrainRandomForest(); // Use random forest as fallback
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Evaluate model performance |
|
||
|
//+------------------------------------------------------------------+
|
||
|
void CMLPredictor::EvaluateModel(double &test_features[][], double &test_labels[])
|
||
|
{
|
||
|
int test_count = ArrayRange(test_features, 0);
|
||
|
if(test_count == 0) return;
|
||
|
|
||
|
int true_positive = 0;
|
||
|
int true_negative = 0;
|
||
|
int false_positive = 0;
|
||
|
int false_negative = 0;
|
||
|
|
||
|
for(int i = 0; i < test_count; i++)
|
||
|
{
|
||
|
//--- Make prediction
|
||
|
MLPrediction pred = Predict(_Symbol); // Simplified
|
||
|
|
||
|
//--- Compare with actual
|
||
|
bool predicted_buy = (pred.confidence > 0.6 && pred.direction == ORDER_TYPE_BUY);
|
||
|
bool actual_buy = (test_labels[i] > 0.5);
|
||
|
|
||
|
if(predicted_buy && actual_buy)
|
||
|
true_positive++;
|
||
|
else if(!predicted_buy && !actual_buy)
|
||
|
true_negative++;
|
||
|
else if(predicted_buy && !actual_buy)
|
||
|
false_positive++;
|
||
|
else
|
||
|
false_negative++;
|
||
|
}
|
||
|
|
||
|
//--- Calculate metrics
|
||
|
m_accuracy = (double)(true_positive + true_negative) / test_count;
|
||
|
|
||
|
if(true_positive + false_positive > 0)
|
||
|
m_precision = (double)true_positive / (true_positive + false_positive);
|
||
|
else
|
||
|
m_precision = 0;
|
||
|
|
||
|
if(true_positive + false_negative > 0)
|
||
|
m_recall = (double)true_positive / (true_positive + false_negative);
|
||
|
else
|
||
|
m_recall = 0;
|
||
|
|
||
|
if(m_precision + m_recall > 0)
|
||
|
m_f1_score = 2 * m_precision * m_recall / (m_precision + m_recall);
|
||
|
else
|
||
|
m_f1_score = 0;
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Save model to file |
|
||
|
//+------------------------------------------------------------------+
|
||
|
bool CMLPredictor::SaveModel(string filename)
|
||
|
{
|
||
|
string full_path = m_model_path + filename;
|
||
|
int handle = FileOpen(full_path, FILE_WRITE|FILE_BIN);
|
||
|
|
||
|
if(handle == INVALID_HANDLE)
|
||
|
return false;
|
||
|
|
||
|
//--- Save model parameters
|
||
|
FileWriteInteger(handle, m_model_type);
|
||
|
FileWriteInteger(handle, m_input_features);
|
||
|
FileWriteInteger(handle, m_output_classes);
|
||
|
FileWriteDouble(handle, m_learning_rate);
|
||
|
|
||
|
//--- Save feature scaling
|
||
|
for(int i = 0; i < m_input_features; i++)
|
||
|
{
|
||
|
FileWriteDouble(handle, m_feature_mean[i]);
|
||
|
FileWriteDouble(handle, m_feature_std[i]);
|
||
|
}
|
||
|
|
||
|
//--- Save model-specific data
|
||
|
if(m_model_type == ML_NEURAL_NETWORK && m_use_deep_learning)
|
||
|
{
|
||
|
//--- Save network architecture
|
||
|
FileWriteInteger(handle, m_layer_count);
|
||
|
|
||
|
for(int layer = 0; layer < m_layer_count; layer++)
|
||
|
{
|
||
|
FileWriteInteger(handle, m_layers[layer].input_size);
|
||
|
FileWriteInteger(handle, m_layers[layer].output_size);
|
||
|
|
||
|
//--- Save weights
|
||
|
for(int i = 0; i < m_layers[layer].output_size; i++)
|
||
|
{
|
||
|
for(int j = 0; j < m_layers[layer].input_size; j++)
|
||
|
{
|
||
|
FileWriteDouble(handle, m_layers[layer].weights[i][j]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//--- Save biases
|
||
|
for(int i = 0; i < m_layers[layer].output_size; i++)
|
||
|
{
|
||
|
FileWriteDouble(handle, m_layers[layer].biases[i]);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
FileClose(handle);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
//+------------------------------------------------------------------+
|
||
|
//| Load model from file |
|
||
|
//+------------------------------------------------------------------+
|
||
|
bool CMLPredictor::LoadModel(string filename)
|
||
|
{
|
||
|
string full_path = m_model_path + filename;
|
||
|
|
||
|
if(!FileIsExist(full_path))
|
||
|
return false;
|
||
|
|
||
|
int handle = FileOpen(full_path, FILE_READ|FILE_BIN);
|
||
|
|
||
|
if(handle == INVALID_HANDLE)
|
||
|
return false;
|
||
|
|
||
|
//--- Load model parameters
|
||
|
m_model_type = (ENUM_ML_MODEL)FileReadInteger(handle);
|
||
|
m_input_features = FileReadInteger(handle);
|
||
|
m_output_classes = FileReadInteger(handle);
|
||
|
m_learning_rate = FileReadDouble(handle);
|
||
|
|
||
|
//--- Load feature scaling
|
||
|
ArrayResize(m_feature_mean, m_input_features);
|
||
|
ArrayResize(m_feature_std, m_input_features);
|
||
|
|
||
|
for(int i = 0; i < m_input_features; i++)
|
||
|
{
|
||
|
m_feature_mean[i] = FileReadDouble(handle);
|
||
|
m_feature_std[i] = FileReadDouble(handle);
|
||
|
}
|
||
|
|
||
|
//--- Load model-specific data
|
||
|
if(m_model_type == ML_NEURAL_NETWORK)
|
||
|
{
|
||
|
//--- Load network architecture
|
||
|
m_layer_count = FileReadInteger(handle);
|
||
|
ArrayResize(m_layers, m_layer_count);
|
||
|
|
||
|
for(int layer = 0; layer < m_layer_count; layer++)
|
||
|
{
|
||
|
m_layers[layer].input_size = FileReadInteger(handle);
|
||
|
m_layers[layer].output_size = FileReadInteger(handle);
|
||
|
|
||
|
//--- Load weights
|
||
|
ArrayResize(m_layers[layer].weights, m_layers[layer].output_size);
|
||
|
for(int i = 0; i < m_layers[layer].output_size; i++)
|
||
|
{
|
||
|
ArrayResize(m_layers[layer].weights[i], m_layers[layer].input_size);
|
||
|
for(int j = 0; j < m_layers[layer].input_size; j++)
|
||
|
{
|
||
|
m_layers[layer].weights[i][j] = FileReadDouble(handle);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//--- Load biases
|
||
|
ArrayResize(m_layers[layer].biases, m_layers[layer].output_size);
|
||
|
for(int i = 0; i < m_layers[layer].output_size; i++)
|
||
|
{
|
||
|
m_layers[layer].biases[i] = FileReadDouble(handle);
|
||
|
}
|
||
|
|
||
|
//--- Initialize activations
|
||
|
ArrayResize(m_layers[layer].activations, m_layers[layer].output_size);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
FileClose(handle);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
#endif // ML_PREDICTOR_MQH
|