2022-03-20 10:09:33 +01:00
|
|
|
from datetime import timedelta, datetime
|
|
|
|
|
from dateutil import parser
|
|
|
|
|
|
2024-12-14 19:26:15 +01:00
|
|
|
#import math
|
|
|
|
|
#import os.path
|
|
|
|
|
#import json
|
|
|
|
|
#import time
|
|
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
|
|
import click
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2024-12-14 19:26:15 +01:00
|
|
|
from binance import Client
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2024-05-11 15:43:40 +02:00
|
|
|
from common.utils import klines_to_df, binance_freq_from_pandas
|
2022-03-20 10:09:33 +01:00
|
|
|
from service.App import *
|
|
|
|
|
|
|
|
|
|
"""
|
2024-12-14 19:26:15 +01:00
|
|
|
Download from binance
|
2022-03-20 10:09:33 +01:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
@click.command()
|
|
|
|
|
@click.option('--config_file', '-c', type=click.Path(), default='', help='Configuration file name')
|
|
|
|
|
def main(config_file):
|
|
|
|
|
"""
|
|
|
|
|
Retrieving historic klines from binance server.
|
|
|
|
|
|
|
|
|
|
Client.get_historical_klines
|
|
|
|
|
"""
|
|
|
|
|
load_config(config_file)
|
|
|
|
|
|
2022-08-14 20:40:43 +02:00
|
|
|
time_column = App.config["time_column"]
|
|
|
|
|
data_path = Path(App.config["data_folder"])
|
2025-03-07 18:07:43 +01:00
|
|
|
download_max_rows = App.config.get("download_max_rows", 0)
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2022-08-14 20:40:43 +02:00
|
|
|
now = datetime.now()
|
|
|
|
|
|
2024-05-11 15:43:40 +02:00
|
|
|
freq = App.config["freq"] # Pandas frequency
|
|
|
|
|
print(f"Pandas frequency: {freq}")
|
|
|
|
|
|
|
|
|
|
freq = binance_freq_from_pandas(freq)
|
|
|
|
|
print(f"Binance frequency: {freq}")
|
|
|
|
|
|
2022-08-14 20:40:43 +02:00
|
|
|
save = True
|
|
|
|
|
|
2024-05-11 15:43:40 +02:00
|
|
|
App.client = Client(api_key=App.config["api_key"], api_secret=App.config["api_secret"])
|
|
|
|
|
|
2022-08-14 20:40:43 +02:00
|
|
|
futures = False
|
2022-03-20 10:09:33 +01:00
|
|
|
if futures:
|
|
|
|
|
App.client.API_URL = "https://fapi.binance.com/fapi"
|
|
|
|
|
App.client.PRIVATE_API_VERSION = "v1"
|
|
|
|
|
App.client.PUBLIC_API_VERSION = "v1"
|
|
|
|
|
|
2022-08-14 20:40:43 +02:00
|
|
|
data_sources = App.config["data_sources"]
|
|
|
|
|
for ds in data_sources:
|
|
|
|
|
# Assumption: folder name is equal to the symbol name we want to download
|
|
|
|
|
quote = ds.get("folder")
|
|
|
|
|
if not quote:
|
|
|
|
|
print(f"ERROR. Folder is not specified.")
|
|
|
|
|
continue
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2022-08-14 20:40:43 +02:00
|
|
|
print(f"Start downloading '{quote}' ...")
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2023-08-19 15:24:06 +02:00
|
|
|
file_path = data_path / quote
|
|
|
|
|
file_path.mkdir(parents=True, exist_ok=True) # Ensure that folder exists
|
2023-08-07 20:22:57 +02:00
|
|
|
|
2023-08-19 15:43:04 +02:00
|
|
|
file_name = (file_path / ("futures" if futures else "klines")).with_suffix(".csv")
|
2023-08-07 20:22:57 +02:00
|
|
|
|
2024-05-11 15:43:40 +02:00
|
|
|
# Get a few latest klines to determine the latest available timestamp
|
|
|
|
|
latest_klines = App.client.get_klines(symbol=quote, interval=freq, limit=5)
|
|
|
|
|
latest_ts = pd.to_datetime(latest_klines[-1][0], unit='ms')
|
|
|
|
|
|
2023-08-07 20:22:57 +02:00
|
|
|
if file_name.is_file():
|
2023-08-19 15:24:06 +02:00
|
|
|
# Load the existing data in order to append newly downloaded data
|
2024-05-11 15:43:40 +02:00
|
|
|
df = pd.read_csv(file_name)
|
|
|
|
|
df[time_column] = pd.to_datetime(df[time_column], format='ISO8601')
|
|
|
|
|
|
|
|
|
|
# oldest_point = parser.parse(data["timestamp"].iloc[-1])
|
|
|
|
|
oldest_point = df["timestamp"].iloc[-5] # Use an older point so that new data will overwrite old data
|
|
|
|
|
|
|
|
|
|
print(f"File found. Downloaded data for {quote} and {freq} since {str(latest_ts)} will be appended to the existing file {file_name}")
|
2022-08-14 20:40:43 +02:00
|
|
|
else:
|
2023-08-19 15:24:06 +02:00
|
|
|
# No existing data so we will download all available data and store as a new file
|
2024-05-11 15:43:40 +02:00
|
|
|
df = pd.DataFrame()
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2024-05-11 15:43:40 +02:00
|
|
|
oldest_point = datetime(2017, 1, 1)
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2024-05-11 15:43:40 +02:00
|
|
|
print(f"File not found. All data will be downloaded and stored in newly created file for {quote} and {freq}.")
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2024-05-11 15:43:40 +02:00
|
|
|
#delta_minutes = (latest_ts - oldest_point).total_seconds() / 60
|
|
|
|
|
#binsizes = {"1m": 1, "5m": 5, "1h": 60, "1d": 1440}
|
|
|
|
|
#delta_lines = math.ceil(delta_minutes / binsizes[freq])
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2023-08-19 15:24:06 +02:00
|
|
|
# === Download from the remote server using binance client
|
2022-08-14 20:40:43 +02:00
|
|
|
klines = App.client.get_historical_klines(
|
2024-05-11 15:43:40 +02:00
|
|
|
symbol=quote,
|
|
|
|
|
interval=freq,
|
|
|
|
|
start_str=oldest_point.isoformat(),
|
|
|
|
|
#end_str=latest_ts.isoformat() # fetch everything up to now
|
2022-08-14 20:40:43 +02:00
|
|
|
)
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2024-05-11 15:43:40 +02:00
|
|
|
df = klines_to_df(klines, df)
|
|
|
|
|
|
|
|
|
|
# Remove last row because it represents a non-complete kline (the interval not finished yet)
|
|
|
|
|
df = df.iloc[:-1]
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2025-03-07 18:07:43 +01:00
|
|
|
# Limit the saved size by only the latest rows
|
|
|
|
|
if download_max_rows:
|
|
|
|
|
df = df.tail(download_max_rows)
|
|
|
|
|
|
2022-08-14 20:40:43 +02:00
|
|
|
if save:
|
2024-05-11 15:43:40 +02:00
|
|
|
df.to_csv(file_name)
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2025-03-07 18:07:43 +01:00
|
|
|
print(f"Finished downloading '{quote}'. Stored {len(df)} rows in '{file_name}'")
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2022-08-14 20:40:43 +02:00
|
|
|
elapsed = datetime.now() - now
|
|
|
|
|
print(f"Finished downloading data in {str(elapsed).split('.')[0]}")
|
2022-03-20 10:09:33 +01:00
|
|
|
|
2024-05-11 15:43:40 +02:00
|
|
|
return df
|
2022-03-20 10:09:33 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main()
|