intelligent-trading-bot/common/classifier_nn.py

161 lines
4.9 KiB
Python
Raw Permalink Normal View History

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from keras.optimizers import *
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.regularizers import *
from keras.callbacks import *
def train_predict_nn(df_X, df_y, df_X_test, model_config: dict):
"""
Train model with the specified hyper-parameters and return its predictions for the test data.
"""
model_pair = train_nn(df_X, df_y, model_config)
y_test_hat = predict_nn(model_pair, df_X_test, model_config)
return y_test_hat
def train_nn(df_X, df_y, model_config: dict):
"""
Train model with the specified hyper-parameters and return this model (and scaler if any).
"""
params = model_config.get("params", {})
is_scale = params.get("is_scale", True)
is_regression = params.get("is_regression", False)
#
# Scale
#
if is_scale:
scaler = StandardScaler()
scaler.fit(df_X)
X_train = scaler.transform(df_X)
else:
scaler = None
X_train = df_X.values
y_train = df_y.values
#
# Create model
#
n_features = X_train.shape[1]
layers = params.get("layers") # List of ints
if not layers:
layers = [n_features // 4] # Default
if not isinstance(layers, list):
layers = [layers]
# Topology
model = Sequential()
# sigmoid, relu, tanh, selu, elu, exponential
# kernel_regularizer=l2(0.001)
reg_l2 = 0.001
train_conf = model_config.get("train", {})
learning_rate = train_conf.get("learning_rate")
n_epochs = train_conf.get("n_epochs")
batch_size = train_conf.get("bs")
for i, out_features in enumerate(layers):
in_features = n_features if i == 0 else layers[i-1]
model.add(Dense(out_features, activation='sigmoid', input_dim=in_features)) # , kernel_regularizer=l2(reg_l2)
#model.add(Dropout(rate=0.5))
if is_regression:
model.add(Dense(units=1))
model.compile(
loss='mean_squared_error',
optimizer=Adam(learning_rate=learning_rate),
metrics=[
tf.keras.metrics.MeanAbsoluteError(name="mean_absolute_error"),
tf.keras.metrics.MeanAbsolutePercentageError(name="mean_absolute_percentage_error"),
tf.keras.metrics.R2Score(name="r2_score"),
],
)
else:
model.add(Dense(units=1, activation='sigmoid'))
model.compile(
loss='binary_crossentropy',
optimizer=Adam(learning_rate=learning_rate),
metrics=[
tf.keras.metrics.AUC(name="auc"),
tf.keras.metrics.Precision(name="precision"),
tf.keras.metrics.Recall(name="recall"),
],
)
#model.summary()
# Default arguments for early stopping
es_args = dict(
monitor = "loss", # val_loss loss
min_delta = 0.00001, # Minimum change qualified as improvement
patience = 5, # Number of epochs with no improvements
verbose = 0,
mode = 'auto',
)
es_args.update(train_conf.get("es", {})) # Overwrite default values with those explicitly specified in config
es = EarlyStopping(**es_args)
#
# Train
#
model.fit(
X_train,
y_train,
batch_size=batch_size,
epochs=n_epochs,
#validation_split=0.05,
#validation_data=(X_validate, y_validate),
#class_weight={0: 1, 1: 20},
callbacks=[es],
verbose=1,
)
return (model, scaler)
def predict_nn(models: tuple, df_X_test, model_config: dict):
"""
Use the model(s) to make predictions for the test data.
The first model is a prediction model and the second model (optional) is a scaler.
"""
#
# Scale
#
scaler = models[1]
is_scale = scaler is not None
input_index = df_X_test.index
if is_scale:
df_X_test = scaler.transform(df_X_test)
df_X_test = pd.DataFrame(data=df_X_test, index=input_index)
else:
df_X_test = df_X_test
df_X_test_nonans = df_X_test.dropna() # Drop nans, possibly create gaps in index
nonans_index = df_X_test_nonans.index
# Resets all (global) state generated by Keras
# Important if prediction is executed in a loop to avoid memory leak
tf.keras.backend.clear_session()
y_test_hat_nonans = models[0].predict_on_batch(df_X_test_nonans.values) # NN returns matrix with one column as prediction
y_test_hat_nonans = y_test_hat_nonans[:, 0] # Or y_test_hat.flatten()
y_test_hat_nonans = pd.Series(data=y_test_hat_nonans, index=nonans_index) # Attach indexes with gaps
df_ret = pd.DataFrame(index=input_index) # Create empty dataframe with original index
df_ret["y_hat"] = y_test_hat_nonans # Join using indexes
sr_ret = df_ret["y_hat"] # This series has all original input indexes but NaNs where input is NaN
return sr_ret