intelligent-trading-bot/scripts/generate_signals.py

Name: MQL5 Algo Forge
Brand: MQL5
import os
import sys
from pathlib import Path
from datetime import datetime, timezone, timedelta
from typing import Union
import json
import pickle

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn import metrics

from common.utils import *

"""
OBSOLETE: Use train_signal_models.py instead for grid search and finding best signal model by loading pre-computed rolling predictions.
  Previously, it was used from the driver start-grid.py.

This script uses fixed signal parameters (thresholds etc.) to make one pass through the predictions by generating buy/sell signals for each row.
Yet, it does not store signals as new signals but rather uses these signals for trading and evaluating overall performance.
The result of performance evaluation is appended as one line to the output file.
In this sense, it is a trade simulation script for evaluating performance.
"""

#
# Parameters
#
class P:
    in_path_name = r"_TEMP_FEATURES"
    in_file_name = r"_BTCUSDT-1m-data-predictions.csv"
    in_nrows = 100_000_000

    out_path_name = r"_TEMP_FEATURES"
    out_file_name = r"_BTCUSDT-1m-data-features_1-signals"

    simulation_start = 100  # 100 172_440 (old data)
    simulation_end = -100  # -100 464_760 (old data)

    threshold_buy_10 = None  # Buy if higher
    threshold_buy_20 = None  # Buy if higher
    knn_threshold_buy_10 = None  # Buy if higher
    knn_threshold_buy_20 = None  # Buy if higher

    threshold_sell = None  # Sell if price increases this factor from the buy price
    forced_sell_length = None

    trade_amount = 1_000.0

    #
    # Parameters which will be changed in the loop and hence need to be reset after each run
    #
    base_amount = 0.0  # Coins
    quote_amount = trade_amount  # Money

    fill_time_nonforced = 0
    fill_time_forced = 0

    total_buy_signal_count = 0  # How many rows satisfy buy signal criteria (in both buy and sell modes)
    buy_signal_count = 0  # How many buy signals in buy mode
    sell_signal_count = 0  # How many time sold for the desired price (excluding forced sales because of time out)

    buy_count = 0  # Really bought (in this approach, equal to buy signals)
    sell_count = 0  # Really sold (in this approach, equal to buy count)

    loss_sales_count = 0
    loss_sales_amount = 0.0


def main(args=None):
    pp = P()  # Here we are guaranteed to have *initial* values which can be then changed in the loop

    in_df = None

    start_dt = datetime.now()

    #
    # Load data with predictions
    #
    print(f"Loading data with predictions from input file...")

    in_path = Path(P.in_path_name).joinpath(P.in_file_name)
    if not in_path.exists():
        print(f"ERROR: Input file does not exist: {in_path}")
        return

    if P.in_file_name.endswith(".csv"):
        in_df = pd.read_csv(in_path, parse_dates=['timestamp'], nrows=P.in_nrows)
    elif P.in_file_name.endswith(".parquet"):
        in_df = pd.read_parquet(in_path)
    else:
        print(f"ERROR: Unknown input file extension. Only csv and parquet are supported.")

    #
    # Initialize parameters of the loop
    #
    if not P.simulation_start:
        P.simulation_start = 0
    if not P.simulation_end:
        P.simulation_end = len(in_df)
    elif P.simulation_end < 0:
        P.simulation_end = len(in_df) + P.simulation_end

    #
    # Hyper-parameters with defaults
    #
    P.threshold_buy_10 = float(os.getenv("threshold_buy_10", 0.5))
    P.threshold_buy_20 = float(os.getenv("threshold_buy_20", 0.0))
    P.knn_threshold_buy_10 = float(os.getenv("knn_threshold_buy_10", 0.0))
    P.knn_threshold_buy_20 = float(os.getenv("knn_threshold_buy_20", 0.0))

    P.threshold_sell = float(os.getenv("threshold_sell", 1.01))
    P.forced_sell_length = int(os.getenv("forced_sell_length", 60))

    #
    # Main loop over trade sessions
    #
    print(f"Starting simulation loop...")
    buy_price = None
    buy_time = None
    sell_price = None
    for i in range(P.simulation_start, P.simulation_end):

        if i % 10_000 == 0:
            print(f"Processed {i} of {P.simulation_end - P.simulation_start} records.")

        row = in_df.iloc[i]
        close_price = row["close"]
        high_price = row["high"]

        # Check buy criteria for any row (for both buy and sell modes)
        is_buy_signal = False
        if row["high_60_10_gb"] >= P.threshold_buy_10 and row["high_60_20_gb"] >= P.threshold_buy_20 \
                and row["high_60_10_knn"] >= P.knn_threshold_buy_10 and row["high_60_20_knn"] >= P.knn_threshold_buy_20:
            is_buy_signal = True
            pp.total_buy_signal_count += 1

        if pp.base_amount == 0:  # Buy mode: no coins - trying to buy
            # Generate buy signal
            if is_buy_signal:
                pp.buy_signal_count += 1

                # Execute buy signal by doing trade
                pp.base_amount += P.trade_amount / close_price  # Increase coins
                pp.quote_amount -= P.trade_amount  # Decrease money
                pp.buy_count += 1

                buy_price = close_price
                buy_time = i
                sell_price = buy_price * P.threshold_sell

        elif pp.base_amount > 0:  # Sell mode: there are coins - trying to sell
            # Determine if it was sold for our desired price
            if high_price >= sell_price:
                pp.sell_signal_count += 1

                # Execute buy signal by doing trade
                pp.quote_amount += pp.base_amount * sell_price  # Increase money by selling all coins
                pp.base_amount = 0.0
                pp.sell_count += 1
                pp.fill_time_nonforced += (i - buy_time)
            elif (i - buy_time) > P.forced_sell_length:  # Sell time out. Force sell
                # Execute buy signal by doing trade
                pp.quote_amount += pp.base_amount * close_price  # Increase money by selling all coins
                pp.base_amount = 0.0
                pp.sell_count += 1
                pp.fill_time_forced += P.forced_sell_length

                if close_price < buy_price:  # Losses
                    pp.loss_sales_count += 1
                    pp.loss_sales_amount += (buy_price - close_price)

        else:
            print(f"Inconsistent state: both base and quote assets are zeros.")
            return

    #
    # Close. Sell rest base asset if available for the last price
    #
    i = P.simulation_end
    if pp.base_amount > 0.0:  # Check base asset like BTC (we will spend it)
        # Execute buy signal by doing trade
        pp.quote_amount += pp.base_amount * close_price  # Increase money by selling all coins
        pp.base_amount = 0.0
        pp.sell_count += 1
        pp.fill_time_forced += (i - buy_time)

    #
    # Store simulation parameters and performance
    #
    out_path = Path(P.out_path_name)
    out_path.mkdir(parents=True, exist_ok=True)  # Ensure that folder exists
    out_path = out_path.joinpath(P.out_file_name)

    precision = pp.sell_signal_count / pp.sell_count if pp.sell_count != 0 else 0.0
    mean_fill_time = pp.fill_time_nonforced / pp.sell_signal_count if pp.sell_signal_count != 0 else P.forced_sell_length
    total_performance = 100.0 * (pp.quote_amount - P.trade_amount) / P.trade_amount
    performance_per_trade = total_performance / pp.buy_count if pp.buy_count != 0 else 0.0
    mean_loss_sale = pp.loss_sales_amount / pp.loss_sales_count if pp.loss_sales_count > 0.0 else 0.0

    out_str = ""
    out_str += f"{P.threshold_buy_10}, {P.threshold_buy_20}, {P.knn_threshold_buy_10}, {P.knn_threshold_buy_20}, "
    out_str += f"{P.threshold_sell}, {P.forced_sell_length}, "
    out_str += f"{pp.total_buy_signal_count}, {pp.buy_signal_count}, {pp.sell_signal_count}, "
    out_str += f"{pp.loss_sales_count}, {mean_loss_sale:.2f}, "
    out_str += f"{precision:.2f}, {mean_fill_time:.2f}, "
    out_str += f"{performance_per_trade:.2f}, {total_performance:.2f}"

    # TODO: Performance per forced vs non-forced trade to see how bad forced trades are
    # TODO: Check what happens if we lose money. We cannot use fixed 1000 for trade
    #    In general, we should be able to get negative performance somehow. Check that it is possible.
    #    Check that always using fixed trade amount is correct. For example, we buy for 1000 and return less or more than 1000 in cash. Then again we use 1000 (decreasing the cash), and return more or less.

    header_str = f"threshold_buy_10,threshold_buy_20,knn_threshold_buy_10,knn_threshold_buy_20," \
                 f"threshold_sell,forced_sell_length," \
                 f"total_buy_signal_count,buy_signal_count,sell_signal_count," \
                 f"loss_sales_count,mean_loss_sale," \
                 f"precision,mean_fill_time," \
                 f"performance_per_trade,total_performance"
    if out_path.with_suffix('.txt').is_file():
        add_header = False
    else:
        add_header = True

    with open(out_path.with_suffix('.txt'), "a+") as f:
        if add_header:
            f.write(header_str + "\n")
        f.write(out_str + "\n")

    print(f"")
    print(f"Simulation finished:")


if __name__ == '__main__':
    main(sys.argv[1:])