#!/usr/bin/env python3 """ Lightweight repository sanity checks suitable for GitHub Actions. This is intentionally NOT a compiler for MQL5 (MetaEditor isn't available on CI). """ from __future__ import annotations import os import re import sys from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[1] MQL5_PREFIX = str(REPO_ROOT / "mt5" / "MQL5") + os.sep def fail(msg: str) -> None: print(f"ERROR: {msg}", file=sys.stderr) raise SystemExit(1) def run_validation() -> list[str]: """ Single-pass repository validation using os.walk for performance. Validates MQL5 source files and scans for potential secrets. """ # Known credential formats (targeted) patterns = [ ("telegram_bot_token", r"\b\d{8,}:[A-Za-z0-9_-]{20,}\b"), ("github_pat", r"\bgithub_pat_[A-Za-z0-9_]{20,}\b"), ("github_classic_pat", r"\bghp_[A-Za-z0-9]{30,}\b"), ("github_actions_token", r"\bghs_[A-Za-z0-9]{30,}\b"), ("aws_access_key_id", r"\bAKIA[0-9A-Z]{16}\b"), ("gcp_api_key", r"\bAIza[0-9A-Za-z\-_]{30,}\b"), ] # ⚡ Optimization: Combine patterns into a single regex with named groups combined_pattern = re.compile("|".join(f"(?P<{name}>{pat})" for name, pat in patterns)) pattern_names = [p[0] for p in patterns] scan_suffixes = { ".md", ".txt", ".json", ".yml", ".yaml", ".toml", ".ini", ".cfg", ".py", ".ps1", ".sh", ".bat", ".mq5", ".mqh", ".html", ".js", ".css", } scan_filenames = {"Dockerfile", "docker-compose.yml", "docker-compose.dev.yml"} excluded_dirnames = { ".git", "dist", "logs", "data", "__pycache__", "venv", "env", ".venv", "node_modules", } found_source_files: list[str] = [] secret_findings: list[tuple[str, str, int]] = [] # ⚡ Optimization: Single pass traversal using os.walk (faster than Path.rglob) for root, dirs, files in os.walk(REPO_ROOT): # Skip excluded directories in-place to prevent os.walk from entering them dirs[:] = [d for d in dirs if d not in excluded_dirnames] for file in files: full_path = os.path.join(root, file) rel_path = os.path.relpath(full_path, REPO_ROOT) # 1. Source File Validation (MQL5 files) # Use string prefix check for performance is_mql5 = full_path.startswith(MQL5_PREFIX) ext = os.path.splitext(file)[1].lower() if is_mql5 and ext in {".mq5", ".mqh"}: try: sz = os.path.getsize(full_path) if sz > 5_000_000: fail(f"Unexpectedly large source file (>5MB): {rel_path} ({sz} bytes)") # ⚡ Optimization: Chunked binary read for NUL byte detection with open(full_path, "rb") as f: has_nul = False while True: chunk = f.read(64 * 1024) if not chunk: break if b"\x00" in chunk: has_nul = True break if has_nul: fail(f"NUL byte found in {rel_path}") found_source_files.append(rel_path) except OSError as e: fail(f"Failed to access source file {rel_path}: {e}") # 2. Secret Scanning if file in scan_filenames or ext in scan_suffixes: try: # Skip large files for secret scanning if os.path.getsize(full_path) > 2_000_000: continue # ⚡ Optimization: Efficient line-by-line scan with combined regex with open(full_path, 'r', encoding='utf-8', errors='ignore') as f: for i, line in enumerate(f, start=1): match = combined_pattern.search(line) if match: # Identify which group matched for name in pattern_names: if match.group(name): secret_findings.append((name, rel_path, i)) break except OSError: continue # Report findings if secret_findings: msg_lines = ["Potential secret(s) detected in tracked files:"] for name, path, line_no in secret_findings[:25]: msg_lines.append(f"- {name}: {path}:{line_no}") if len(secret_findings) > 25: msg_lines.append(f"... and {len(secret_findings) - 25} more") msg_lines.append("Remove the credential from the repository and rotate/revoke it.") fail("\n".join(msg_lines)) if not found_source_files: # Check if directory exists at least if not os.path.exists(MQL5_PREFIX): fail(f"Missing directory: {MQL5_PREFIX}") fail(f"No .mq5/.mqh files found under {MQL5_PREFIX}") return sorted(found_source_files) def main() -> int: source_files = run_validation() print("OK: found source files:") for f in source_files: print(f"- {f}") return 0 if __name__ == "__main__": sys.exit(main())