from datetime import datetime, date, timedelta from pathlib import Path import pandas as pd import click import yfinance as yf from curl_cffi import requests # Without its Session object, yahoo will reject requests with YFRateLimitError """ Download quotes from Yahoo """ def download_klines(config, data_sources): """ """ time_column = config["time_column"] data_path = Path(config["data_folder"]) download_max_rows = config.get("download_max_rows", 0) now = datetime.now() # This session will be used in all requests to avoid YFRateLimitError session = requests.Session(impersonate="chrome") for ds in data_sources: # Assumption: folder name is equal to the symbol name we want to download quote = ds.get("folder") if not quote: print(f"ERROR. Folder is not specified.") continue # If file name is not specified then use symbol name as file name file = ds.get("file", quote) if not file: file = quote print(f"Start downloading '{quote}' ...") file_path = data_path / quote file_path.mkdir(parents=True, exist_ok=True) # Ensure that folder exists file_name = (file_path / file).with_suffix(".csv") if file_name.is_file(): df = pd.read_csv(file_name, parse_dates=[time_column], date_format="ISO8601") #df['Date'] = pd.to_datetime(df['Date'], format="ISO8601") # "2022-06-07" iso format df[time_column] = df[time_column].dt.date last_date = df.iloc[-1][time_column] overlap = 2 # The overlap can be longer because the difference in days includes also weekends which are not trade days days = (pd.Timestamp(now) - pd.Timestamp(last_date)).days + overlap # === Download from the remote server # Download more data than we need and then overwrite the older data new_df = yf.download(quote, period=f"{days}d", auto_adjust=True, multi_level_index=False, session=session) new_df = new_df.reset_index() new_df['Date'] = pd.to_datetime(new_df['Date'], format="ISO8601", utc=True).dt.date #del new_df['Close'] #new_df.rename({'Adj Close': 'Close'}, axis=1, inplace=True) new_df.rename({'Date': time_column}, axis=1, inplace=True) new_df.columns = new_df.columns.str.lower() df = pd.concat([df, new_df]) df = df.drop_duplicates(subset=[time_column], keep="last") else: print(f"File not found. Full fetch...") # === Download from the remote server #df = yf.download(quote, date(1990, 1, 1), auto_adjust=True, multi_level_index=False) df = yf.download(quote, period="max", auto_adjust=True, multi_level_index=False, session=session) df = df.reset_index() df['Date'] = pd.to_datetime(df['Date'], format="ISO8601", utc=True).dt.date #del df['Close'] #df.rename({'Adj Close': 'Close'}, axis=1, inplace=True) df.rename({'Date': time_column}, axis=1, inplace=True) df.columns = df.columns.str.lower() print(f"Full fetch finished.") df = df.sort_values(by=time_column) # Limit the saved size by only the latest rows if download_max_rows: df = df.tail(download_max_rows) df.to_csv(file_name, index=False) print(f"Finished downloading '{quote}'. Stored {len(df)} rows in '{file_name}'")