intelligent-trading-bot/scripts/generate_signals.py
2020-11-29 21:15:13 +01:00

236 lines
9 KiB
Python

import os
import sys
from pathlib import Path
from datetime import datetime, timezone, timedelta
from typing import Union
import json
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import metrics
from common.utils import *
"""
OBSOLETE: Use train_signal_models.py instead for grid search and finding best signal model by loading pre-computed rolling predictions.
Previously, it was used from the driver start-grid.py.
This script uses fixed signal parameters (thresholds etc.) to make one pass through the predictions by generating buy/sell signals for each row.
Yet, it does not store signals as new signals but rather uses these signals for trading and evaluating overall performance.
The result of performance evaluation is appended as one line to the output file.
In this sense, it is a trade simulation script for evaluating performance.
"""
#
# Parameters
#
class P:
in_path_name = r"_TEMP_FEATURES"
in_file_name = r"_BTCUSDT-1m-data-predictions.csv"
in_nrows = 100_000_000
out_path_name = r"_TEMP_FEATURES"
out_file_name = r"_BTCUSDT-1m-data-features_1-signals"
simulation_start = 100 # 100 172_440 (old data)
simulation_end = -100 # -100 464_760 (old data)
threshold_buy_10 = None # Buy if higher
threshold_buy_20 = None # Buy if higher
knn_threshold_buy_10 = None # Buy if higher
knn_threshold_buy_20 = None # Buy if higher
threshold_sell = None # Sell if price increases this factor from the buy price
forced_sell_length = None
trade_amount = 1_000.0
#
# Parameters which will be changed in the loop and hence need to be reset after each run
#
base_amount = 0.0 # Coins
quote_amount = trade_amount # Money
fill_time_nonforced = 0
fill_time_forced = 0
total_buy_signal_count = 0 # How many rows satisfy buy signal criteria (in both buy and sell modes)
buy_signal_count = 0 # How many buy signals in buy mode
sell_signal_count = 0 # How many time sold for the desired price (excluding forced sales because of time out)
buy_count = 0 # Really bought (in this approach, equal to buy signals)
sell_count = 0 # Really sold (in this approach, equal to buy count)
loss_sales_count = 0
loss_sales_amount = 0.0
def main(args=None):
pp = P() # Here we are guaranteed to have *initial* values which can be then changed in the loop
in_df = None
start_dt = datetime.now()
#
# Load data with predictions
#
print(f"Loading data with predictions from input file...")
in_path = Path(P.in_path_name).joinpath(P.in_file_name)
if not in_path.exists():
print(f"ERROR: Input file does not exist: {in_path}")
return
if P.in_file_name.endswith(".csv"):
in_df = pd.read_csv(in_path, parse_dates=['timestamp'], nrows=P.in_nrows)
elif P.in_file_name.endswith(".parquet"):
in_df = pd.read_parquet(in_path)
else:
print(f"ERROR: Unknown input file extension. Only csv and parquet are supported.")
#
# Initialize parameters of the loop
#
if not P.simulation_start:
P.simulation_start = 0
if not P.simulation_end:
P.simulation_end = len(in_df)
elif P.simulation_end < 0:
P.simulation_end = len(in_df) + P.simulation_end
#
# Hyper-parameters with defaults
#
P.threshold_buy_10 = float(os.getenv("threshold_buy_10", 0.5))
P.threshold_buy_20 = float(os.getenv("threshold_buy_20", 0.0))
P.knn_threshold_buy_10 = float(os.getenv("knn_threshold_buy_10", 0.0))
P.knn_threshold_buy_20 = float(os.getenv("knn_threshold_buy_20", 0.0))
P.threshold_sell = float(os.getenv("threshold_sell", 1.01))
P.forced_sell_length = int(os.getenv("forced_sell_length", 60))
#
# Main loop over trade sessions
#
print(f"Starting simulation loop...")
buy_price = None
buy_time = None
sell_price = None
for i in range(P.simulation_start, P.simulation_end):
if i % 10_000 == 0:
print(f"Processed {i} of {P.simulation_end - P.simulation_start} records.")
row = in_df.iloc[i]
close_price = row["close"]
high_price = row["high"]
# Check buy criteria for any row (for both buy and sell modes)
is_buy_signal = False
if row["high_60_10_gb"] >= P.threshold_buy_10 and row["high_60_20_gb"] >= P.threshold_buy_20 \
and row["high_60_10_knn"] >= P.knn_threshold_buy_10 and row["high_60_20_knn"] >= P.knn_threshold_buy_20:
is_buy_signal = True
pp.total_buy_signal_count += 1
if pp.base_amount == 0: # Buy mode: no coins - trying to buy
# Generate buy signal
if is_buy_signal:
pp.buy_signal_count += 1
# Execute buy signal by doing trade
pp.base_amount += P.trade_amount / close_price # Increase coins
pp.quote_amount -= P.trade_amount # Decrease money
pp.buy_count += 1
buy_price = close_price
buy_time = i
sell_price = buy_price * P.threshold_sell
elif pp.base_amount > 0: # Sell mode: there are coins - trying to sell
# Determine if it was sold for our desired price
if high_price >= sell_price:
pp.sell_signal_count += 1
# Execute buy signal by doing trade
pp.quote_amount += pp.base_amount * sell_price # Increase money by selling all coins
pp.base_amount = 0.0
pp.sell_count += 1
pp.fill_time_nonforced += (i - buy_time)
elif (i - buy_time) > P.forced_sell_length: # Sell time out. Force sell
# Execute buy signal by doing trade
pp.quote_amount += pp.base_amount * close_price # Increase money by selling all coins
pp.base_amount = 0.0
pp.sell_count += 1
pp.fill_time_forced += P.forced_sell_length
if close_price < buy_price: # Losses
pp.loss_sales_count += 1
pp.loss_sales_amount += (buy_price - close_price)
else:
print(f"Inconsistent state: both base and quote assets are zeros.")
return
#
# Close. Sell rest base asset if available for the last price
#
i = P.simulation_end
if pp.base_amount > 0.0: # Check base asset like BTC (we will spend it)
# Execute buy signal by doing trade
pp.quote_amount += pp.base_amount * close_price # Increase money by selling all coins
pp.base_amount = 0.0
pp.sell_count += 1
pp.fill_time_forced += (i - buy_time)
#
# Store simulation parameters and performance
#
out_path = Path(P.out_path_name)
out_path.mkdir(parents=True, exist_ok=True) # Ensure that folder exists
out_path = out_path.joinpath(P.out_file_name)
precision = pp.sell_signal_count / pp.sell_count if pp.sell_count != 0 else 0.0
mean_fill_time = pp.fill_time_nonforced / pp.sell_signal_count if pp.sell_signal_count != 0 else P.forced_sell_length
total_performance = 100.0 * (pp.quote_amount - P.trade_amount) / P.trade_amount
performance_per_trade = total_performance / pp.buy_count if pp.buy_count != 0 else 0.0
mean_loss_sale = pp.loss_sales_amount / pp.loss_sales_count if pp.loss_sales_count > 0.0 else 0.0
out_str = ""
out_str += f"{P.threshold_buy_10}, {P.threshold_buy_20}, {P.knn_threshold_buy_10}, {P.knn_threshold_buy_20}, "
out_str += f"{P.threshold_sell}, {P.forced_sell_length}, "
out_str += f"{pp.total_buy_signal_count}, {pp.buy_signal_count}, {pp.sell_signal_count}, "
out_str += f"{pp.loss_sales_count}, {mean_loss_sale:.2f}, "
out_str += f"{precision:.2f}, {mean_fill_time:.2f}, "
out_str += f"{performance_per_trade:.2f}, {total_performance:.2f}"
# TODO: Performance per forced vs non-forced trade to see how bad forced trades are
# TODO: Check what happens if we lose money. We cannot use fixed 1000 for trade
# In general, we should be able to get negative performance somehow. Check that it is possible.
# Check that always using fixed trade amount is correct. For example, we buy for 1000 and return less or more than 1000 in cash. Then again we use 1000 (decreasing the cash), and return more or less.
header_str = f"threshold_buy_10,threshold_buy_20,knn_threshold_buy_10,knn_threshold_buy_20," \
f"threshold_sell,forced_sell_length," \
f"total_buy_signal_count,buy_signal_count,sell_signal_count," \
f"loss_sales_count,mean_loss_sale," \
f"precision,mean_fill_time," \
f"performance_per_trade,total_performance"
if out_path.with_suffix('.txt').is_file():
add_header = False
else:
add_header = True
with open(out_path.with_suffix('.txt'), "a+") as f:
if add_header:
f.write(header_str + "\n")
f.write(out_str + "\n")
print(f"")
print(f"Simulation finished:")
if __name__ == '__main__':
main(sys.argv[1:])