intelligent-trading-bot/common/classifier_nn.py
2026-01-21 13:10:46 +01:00

161 lines
4.9 KiB
Python

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from keras.optimizers import *
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.regularizers import *
from keras.callbacks import *
def train_predict_nn(df_X, df_y, df_X_test, model_config: dict):
"""
Train model with the specified hyper-parameters and return its predictions for the test data.
"""
model_pair = train_nn(df_X, df_y, model_config)
y_test_hat = predict_nn(model_pair, df_X_test, model_config)
return y_test_hat
def train_nn(df_X, df_y, model_config: dict):
"""
Train model with the specified hyper-parameters and return this model (and scaler if any).
"""
params = model_config.get("params", {})
is_scale = params.get("is_scale", True)
is_regression = params.get("is_regression", False)
#
# Scale
#
if is_scale:
scaler = StandardScaler()
scaler.fit(df_X)
X_train = scaler.transform(df_X)
else:
scaler = None
X_train = df_X.values
y_train = df_y.values
#
# Create model
#
n_features = X_train.shape[1]
layers = params.get("layers") # List of ints
if not layers:
layers = [n_features // 4] # Default
if not isinstance(layers, list):
layers = [layers]
# Topology
model = Sequential()
# sigmoid, relu, tanh, selu, elu, exponential
# kernel_regularizer=l2(0.001)
reg_l2 = 0.001
train_conf = model_config.get("train", {})
learning_rate = train_conf.get("learning_rate")
n_epochs = train_conf.get("n_epochs")
batch_size = train_conf.get("bs")
for i, out_features in enumerate(layers):
in_features = n_features if i == 0 else layers[i-1]
model.add(Dense(out_features, activation='sigmoid', input_dim=in_features)) # , kernel_regularizer=l2(reg_l2)
#model.add(Dropout(rate=0.5))
if is_regression:
model.add(Dense(units=1))
model.compile(
loss='mean_squared_error',
optimizer=Adam(learning_rate=learning_rate),
metrics=[
tf.keras.metrics.MeanAbsoluteError(name="mean_absolute_error"),
tf.keras.metrics.MeanAbsolutePercentageError(name="mean_absolute_percentage_error"),
tf.keras.metrics.R2Score(name="r2_score"),
],
)
else:
model.add(Dense(units=1, activation='sigmoid'))
model.compile(
loss='binary_crossentropy',
optimizer=Adam(learning_rate=learning_rate),
metrics=[
tf.keras.metrics.AUC(name="auc"),
tf.keras.metrics.Precision(name="precision"),
tf.keras.metrics.Recall(name="recall"),
],
)
#model.summary()
# Default arguments for early stopping
es_args = dict(
monitor = "loss", # val_loss loss
min_delta = 0.00001, # Minimum change qualified as improvement
patience = 5, # Number of epochs with no improvements
verbose = 0,
mode = 'auto',
)
es_args.update(train_conf.get("es", {})) # Overwrite default values with those explicitly specified in config
es = EarlyStopping(**es_args)
#
# Train
#
model.fit(
X_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
#validation_split=0.05,
#validation_data=(X_validate, y_validate),
#class_weight={0: 1, 1: 20},
callbacks=[es],
verbose=1,
)
return (model, scaler)
def predict_nn(models: tuple, df_X_test, model_config: dict):
"""
Use the model(s) to make predictions for the test data.
The first model is a prediction model and the second model (optional) is a scaler.
"""
#
# Scale
#
scaler = models[1]
is_scale = scaler is not None
input_index = df_X_test.index
if is_scale:
df_X_test = scaler.transform(df_X_test)
df_X_test = pd.DataFrame(data=df_X_test, index=input_index)
else:
df_X_test = df_X_test
df_X_test_nonans = df_X_test.dropna() # Drop nans, possibly create gaps in index
nonans_index = df_X_test_nonans.index
# Resets all (global) state generated by Keras
# Important if prediction is executed in a loop to avoid memory leak
tf.keras.backend.clear_session()
y_test_hat_nonans = models[0].predict_on_batch(df_X_test_nonans.values) # NN returns matrix with one column as prediction
y_test_hat_nonans = y_test_hat_nonans[:, 0] # Or y_test_hat.flatten()
y_test_hat_nonans = pd.Series(data=y_test_hat_nonans, index=nonans_index) # Attach indexes with gaps
df_ret = pd.DataFrame(index=input_index) # Create empty dataframe with original index
df_ret["y_hat"] = y_test_hat_nonans # Join using indexes
sr_ret = df_ret["y_hat"] # This series has all original input indexes but NaNs where input is NaN
return sr_ret