intelligent-trading-bot/scripts/download_yahoo.py

from datetime import datetime, date, timedelta

import click

import yfinance as yf

from service.App import *

"""
Download quotes from Yahoo
"""

@click.command()
@click.option('--config_file', '-c', type=click.Path(), default='', help='Configuration file name')
def main(config_file):
    """
    """
    load_config(config_file)

    time_column = App.config["time_column"]
    data_path = Path(App.config["data_folder"])
    download_max_rows = App.config.get("download_max_rows", 0)

    now = datetime.now()

    data_sources = App.config["data_sources"]
    for ds in data_sources:
        # Assumption: folder name is equal to the symbol name we want to download
        quote = ds.get("folder")
        if not quote:
            print(f"ERROR. Folder is not specified.")
            continue

        # If file name is not specified then use symbol name as file name
        file = ds.get("file", quote)
        if not file:
            file = quote

        print(f"Start downloading '{quote}' ...")

        file_path = data_path / quote
        file_path.mkdir(parents=True, exist_ok=True)  # Ensure that folder exists

        file_name = (file_path / file).with_suffix(".csv")

        if file_name.is_file():
            df = pd.read_csv(file_name, parse_dates=[time_column], date_format="ISO8601")
            #df['Date'] = pd.to_datetime(df['Date'], format="ISO8601")  # "2022-06-07" iso format
            df[time_column] = df[time_column].dt.date
            last_date = df.iloc[-1][time_column]

            # === Download from the remote server
            # Download more data than we need and then overwrite the older data
            new_df = yf.download(quote, period="5d", auto_adjust=True, multi_level_index=False)

            new_df = new_df.reset_index()
            new_df['Date'] = pd.to_datetime(new_df['Date'], format="ISO8601").dt.date
            #del new_df['Close']
            #new_df.rename({'Adj Close': 'Close'}, axis=1, inplace=True)
            new_df.rename({'Date': time_column}, axis=1, inplace=True)
            new_df.columns = new_df.columns.str.lower()

            df = pd.concat([df, new_df])
            df = df.drop_duplicates(subset=[time_column], keep="last")

        else:
            print(f"File not found. Full fetch...")

            # === Download from the remote server
            #df = yf.download(quote, date(1990, 1, 1), auto_adjust=True, multi_level_index=False)
            df = yf.download(quote, period="max", auto_adjust=True, multi_level_index=False)

            df = df.reset_index()
            df['Date'] = pd.to_datetime(df['Date'], format="ISO8601").dt.date
            #del df['Close']
            #df.rename({'Adj Close': 'Close'}, axis=1, inplace=True)
            df.rename({'Date': time_column}, axis=1, inplace=True)
            df.columns = df.columns.str.lower()

            print(f"Full fetch finished.")

        df = df.sort_values(by=time_column)

        # Limit the saved size by only the latest rows
        if download_max_rows:
            df = df.tail(download_max_rows)

        df.to_csv(file_name, index=False)
        print(f"Finished downloading '{quote}'. Stored {len(df)} rows in '{file_name}'")

    elapsed = datetime.now() - now
    print(f"Finished downloading data in {str(elapsed).split('.')[0]}")

    return df


if __name__ == '__main__':
    main()
add yahoo downloader 2022-07-10 11:14:47 +02:00			`from datetime import datetime, date, timedelta`

			`import click`

minor adjustments in yahoo downloader 2022-07-10 12:17:16 +02:00			`import yfinance as yf`
add yahoo downloader 2022-07-10 11:14:47 +02:00
			`from service.App import *`

			`"""`
			`Download quotes from Yahoo`
			`"""`

			`@click.command()`
			`@click.option('--config_file', '-c', type=click.Path(), default='', help='Configuration file name')`
			`def main(config_file):`
			`"""`
			`"""`
			`load_config(config_file)`

merge data daily raster and improvements 2022-07-16 10:09:56 +02:00			`time_column = App.config["time_column"]`
add yahoo downloader 2022-07-10 11:14:47 +02:00			`data_path = Path(App.config["data_folder"])`
limit download size in downloaders 2025-03-07 18:07:43 +01:00			`download_max_rows = App.config.get("download_max_rows", 0)`
add yahoo downloader 2022-07-10 11:14:47 +02:00
			`now = datetime.now()`

			`data_sources = App.config["data_sources"]`
merge data daily raster and improvements 2022-07-16 10:09:56 +02:00			`for ds in data_sources:`
add yahoo downloader 2022-07-10 11:14:47 +02:00			`# Assumption: folder name is equal to the symbol name we want to download`
merge data daily raster and improvements 2022-07-16 10:09:56 +02:00			`quote = ds.get("folder")`
add yahoo downloader 2022-07-10 11:14:47 +02:00			`if not quote:`
			`print(f"ERROR. Folder is not specified.")`
			`continue`

			`# If file name is not specified then use symbol name as file name`
merge data daily raster and improvements 2022-07-16 10:09:56 +02:00			`file = ds.get("file", quote)`
add yahoo downloader 2022-07-10 11:14:47 +02:00			`if not file:`
			`file = quote`

			`print(f"Start downloading '{quote}' ...")`

small improvements in downloaders for file and path handling 2023-08-19 15:43:04 +02:00			`file_path = data_path / quote`
			`file_path.mkdir(parents=True, exist_ok=True) # Ensure that folder exists`
add yahoo downloader 2022-07-10 11:14:47 +02:00
small improvements in downloaders for file and path handling 2023-08-19 15:43:04 +02:00			`file_name = (file_path / file).with_suffix(".csv")`

			`if file_name.is_file():`
add ISO date format to all date parsers and readers 2023-08-24 20:24:11 +02:00			`df = pd.read_csv(file_name, parse_dates=[time_column], date_format="ISO8601")`
			`#df['Date'] = pd.to_datetime(df['Date'], format="ISO8601") # "2022-06-07" iso format`
fix errors in yahoo downloader 2022-07-31 11:55:54 +02:00			`df[time_column] = df[time_column].dt.date`
			`last_date = df.iloc[-1][time_column]`
add yahoo downloader 2022-07-10 11:14:47 +02:00
			`# === Download from the remote server`
improvements in yahoo downloader 2024-05-11 15:45:51 +02:00			`# Download more data than we need and then overwrite the older data`
fix problem with a new version of yfinance library 2024-12-14 10:21:56 +01:00			`new_df = yf.download(quote, period="5d", auto_adjust=True, multi_level_index=False)`
add yahoo downloader 2022-07-10 11:14:47 +02:00
			`new_df = new_df.reset_index()`
add ISO date format to all date parsers and readers 2023-08-24 20:24:11 +02:00			`new_df['Date'] = pd.to_datetime(new_df['Date'], format="ISO8601").dt.date`
improvements in yahoo downloader 2024-05-11 15:45:51 +02:00			`#del new_df['Close']`
			`#new_df.rename({'Adj Close': 'Close'}, axis=1, inplace=True)`
			`new_df.rename({'Date': time_column}, axis=1, inplace=True)`
minor adjustments in yahoo downloader 2022-07-10 12:17:16 +02:00			`new_df.columns = new_df.columns.str.lower()`

add yahoo downloader 2022-07-10 11:14:47 +02:00			`df = pd.concat([df, new_df])`
merge data daily raster and improvements 2022-07-16 10:09:56 +02:00			`df = df.drop_duplicates(subset=[time_column], keep="last")`
add yahoo downloader 2022-07-10 11:14:47 +02:00
			`else:`
			`print(f"File not found. Full fetch...")`
minor adjustments in yahoo downloader 2022-07-10 12:17:16 +02:00
			`# === Download from the remote server`
fix problem with a new version of yfinance library 2024-12-14 10:21:56 +01:00			`#df = yf.download(quote, date(1990, 1, 1), auto_adjust=True, multi_level_index=False)`
			`df = yf.download(quote, period="max", auto_adjust=True, multi_level_index=False)`
minor adjustments in yahoo downloader 2022-07-10 12:17:16 +02:00
add yahoo downloader 2022-07-10 11:14:47 +02:00			`df = df.reset_index()`
add ISO date format to all date parsers and readers 2023-08-24 20:24:11 +02:00			`df['Date'] = pd.to_datetime(df['Date'], format="ISO8601").dt.date`
improvements in yahoo downloader 2024-05-11 15:45:51 +02:00			`#del df['Close']`
			`#df.rename({'Adj Close': 'Close'}, axis=1, inplace=True)`
			`df.rename({'Date': time_column}, axis=1, inplace=True)`
minor adjustments in yahoo downloader 2022-07-10 12:17:16 +02:00			`df.columns = df.columns.str.lower()`

add yahoo downloader 2022-07-10 11:14:47 +02:00			`print(f"Full fetch finished.")`

merge data daily raster and improvements 2022-07-16 10:09:56 +02:00			`df = df.sort_values(by=time_column)`
add yahoo downloader 2022-07-10 11:14:47 +02:00
limit download size in downloaders 2025-03-07 18:07:43 +01:00			`# Limit the saved size by only the latest rows`
			`if download_max_rows:`
			`df = df.tail(download_max_rows)`

small improvements in downloaders for file and path handling 2023-08-19 15:43:04 +02:00			`df.to_csv(file_name, index=False)`
limit download size in downloaders 2025-03-07 18:07:43 +01:00			`print(f"Finished downloading '{quote}'. Stored {len(df)} rows in '{file_name}'")`
add yahoo downloader 2022-07-10 11:14:47 +02:00
			`elapsed = datetime.now() - now`
			`print(f"Finished downloading data in {str(elapsed).split('.')[0]}")`

			`return df`


			`if __name__ == '__main__':`
			`main()`