import os import pandas as pd from typing import Tuple # Default MT5 Common Files path on Windows DEFAULT_COMMON = os.path.join(os.environ.get("APPDATA", ""), "MetaQuotes", "Terminal", "Common", "Files", "DualEA") FEATURES_NAME = "features.csv" def resolve_features_path(common_dir: str = None) -> str: base = common_dir or DEFAULT_COMMON return os.path.join(base, FEATURES_NAME) def _read_csv_robust(path: str) -> pd.DataFrame: encodings = [ "utf-8", "utf-8-sig", "utf-16", "utf-16-le", "utf-16-be", "cp1252", "latin1", ] last_exc = None for enc in encodings: try: return pd.read_csv(path, encoding=enc) except UnicodeDecodeError as e: last_exc = e continue except Exception as e: # If it's not a decode error, bubble up immediately raise # Fallback with python engine ignoring bad lines if all decodes failed return pd.read_csv(path, encoding="latin1", engine="python") def load_features(common_dir: str = None) -> pd.DataFrame: path = resolve_features_path(common_dir) if not os.path.exists(path): raise FileNotFoundError(f"features.csv not found at {path}") # Comma-delimited, result of EA writes (encoding may vary depending on MT5 settings) df = _read_csv_robust(path) # Normalize columns to lower snake-case (handles header casing/spacing) df.columns = [str(c).strip().lower().replace(" ", "_").replace("-", "_") for c in df.columns] # Basic cleaning df = df.dropna(how="all") # Ensure timestamp is parsed if present for col in ("time", "timestamp", "entry_time", "close_time"): if col in df.columns: try: df[col] = pd.to_datetime(df[col]) except Exception: pass # If long format (feature/value), pivot to wide if {"feature", "value"}.issubset(set(df.columns)): # Coerce value to numeric df["value"] = pd.to_numeric(df["value"], errors="coerce") # Candidate keys to keep identity and slices stable key_priority = [ ["position_id"], ["deal_id"], ["order_id"], ["position_id", "deal_id", "order_id"], ["strategy", "symbol", "timeframe", "entry_time", "close_time"], ["strategy", "symbol", "entry_time", "close_time"], ["symbol", "entry_time", "close_time"], ] keys = [] for cand in key_priority: kk = [c for c in cand if c in df.columns] if len(kk) == len(cand): keys = kk break # Always try to include context columns for downstream use for c in ("strategy", "symbol", "timeframe", "entry_time", "close_time"): if c in df.columns and c not in keys: keys.append(c) if not keys: # Fallback: use all non-feature columns except 'value' as keys to avoid data loss keys = [c for c in df.columns if c not in ("feature", "value")] # Pivot wide = df.pivot_table(index=keys, columns="feature", values="value", aggfunc="last") wide = wide.reset_index() # Flatten columns after pivot wide.columns = [str(c).strip().lower().replace(" ", "_") for c in wide.columns] return wide return df def make_label_from_r_multiple(df: pd.DataFrame, threshold: float = 0.0) -> Tuple[pd.DataFrame, pd.Series]: # Accept common variants candidates = [ "r_multiple", "r", "rmultiple", ] col = None for c in candidates: if c in df.columns: col = c break if col is None: # Try fuzzy search for any col containing both 'r' and 'multiple' for c in df.columns: s = str(c) if "r" in s and "multiple" in s: col = c break if col is None: # Fallback 1: profit column present in the same frame if "profit" in df.columns: vals = pd.to_numeric(df["profit"], errors="coerce").fillna(0.0) y = (vals > 0.0).astype(int) return df, y # Fallback 2: try to read knowledge_base.csv and merge kb_path = os.path.join(os.path.dirname(resolve_features_path()), "knowledge_base.csv") if os.path.exists(kb_path): try: kb = pd.read_csv(kb_path) kb.columns = [str(c).strip().lower().replace(" ", "_").replace("-", "_") for c in kb.columns] # Candidate join keys join_keys_priority = [ ["order_id"], ["deal_id"], ["position_id"], ["symbol", "close_time"], ] for keys in join_keys_priority: if all(k in df.columns for k in keys) and all(k in kb.columns for k in keys): merged = pd.merge(df, kb[[*keys, "profit"]], on=keys, how="left") vals = pd.to_numeric(merged["profit"], errors="coerce").fillna(0.0) y = (vals > 0.0).astype(int) return merged, y except Exception: pass raise ValueError("features.csv must contain r_multiple (or 'r'); profit-based fallback also unavailable") vals = pd.to_numeric(df[col], errors="coerce") y = (vals > threshold).astype(int) return df, y