#!/usr/bin/env python3 """ Lightweight repository sanity checks suitable for GitHub Actions. This is intentionally NOT a compiler for MQL5 (MetaEditor isn't available on CI). """ from __future__ import annotations import re import sys from pathlib import Path REPO_ROOT = Path(__file__).resolve().parents[1] MQL5_DIR = REPO_ROOT / "mt5" / "MQL5" def fail(msg: str) -> None: print(f"ERROR: {msg}", file=sys.stderr) raise SystemExit(1) def validate_and_collect_files() -> list[Path]: """ Iterates through MQL5 source files, validates them (size & content), and returns a sorted list of valid files. """ if not MQL5_DIR.exists(): fail(f"Missing directory: {MQL5_DIR}") files: list[Path] = [] # Single pass to find and validate files for file_path in MQL5_DIR.rglob("*"): if not file_path.is_file(): continue if file_path.suffix.lower() not in {".mq5", ".mqh"}: continue # Optimization: Check size BEFORE reading content # Avoid accidentally committing huge build artifacts. file_size = file_path.stat().st_size if file_size > 5_000_000: fail(f"Unexpectedly large source file (>5MB): {file_path.relative_to(REPO_ROOT)} ({file_size} bytes)") # Check for NUL bytes try: data = file_path.read_bytes() if b"\x00" in data: fail(f"NUL byte found in {file_path.relative_to(REPO_ROOT)}") except Exception as error: fail(f"Failed to read file {file_path.relative_to(REPO_ROOT)}: {error}") files.append(file_path) if not files: fail(f"No .mq5/.mqh files found under {MQL5_DIR}") return sorted(files) def scan_for_secrets() -> None: """ Best-effort check to prevent accidentally committing credentials. Keep patterns targeted to avoid false positives and avoid printing the matched secret to CI logs (only path + line number are reported). """ # Known credential formats (targeted) patterns: list[tuple[str, re.Pattern[str]]] = [ ("telegram_bot_token", re.compile(r"\b\d{8,}:[A-Za-z0-9_-]{20,}\b")), ("github_pat", re.compile(r"\bgithub_pat_[A-Za-z0-9_]{20,}\b")), ("github_classic_pat", re.compile(r"\bghp_[A-Za-z0-9]{30,}\b")), ("github_actions_token", re.compile(r"\bghs_[A-Za-z0-9]{30,}\b")), ("aws_access_key_id", re.compile(r"\bAKIA[0-9A-Z]{16}\b")), # Very rough GCP API key format; still specific enough to avoid most noise. ("gcp_api_key", re.compile(r"\bAIza[0-9A-Za-z\-_]{30,}\b")), ] # Keep this scan fast and avoid binary/large files. scan_suffixes = { ".md", ".txt", ".json", ".yml", ".yaml", ".toml", ".ini", ".cfg", ".py", ".ps1", ".sh", ".bat", ".mq5", ".mqh", ".html", ".js", ".css", } scan_filenames = {"Dockerfile", "docker-compose.yml", "docker-compose.dev.yml"} excluded_dirnames = { ".git", "dist", "logs", "data", "__pycache__", "venv", "env", ".venv", "node_modules", } findings: list[tuple[str, Path, int]] = [] for file_path in REPO_ROOT.rglob("*"): if not file_path.is_file(): continue # Skip excluded directories anywhere in path if any(part in excluded_dirnames for part in file_path.parts): continue if file_path.name not in scan_filenames and file_path.suffix.lower() not in scan_suffixes: continue try: if file_path.stat().st_size > 2_000_000: continue except OSError: continue try: text = file_path.read_text(encoding="utf-8", errors="ignore") except Exception: # Not critical; skip unreadable files rather than failing. continue # Line-by-line scan (lets us report line numbers without leaking the secret) for line_number, line in enumerate(text.splitlines(), start=1): for pattern_name, pattern_regex in patterns: if pattern_regex.search(line): findings.append((pattern_name, file_path, line_number)) # Stop after the first matched pattern for this line break if findings: msg_lines = ["Potential secret(s) detected in tracked files:"] for pattern_name, file_path, line_number in findings[:25]: msg_lines.append(f"- {pattern_name}: {file_path.relative_to(REPO_ROOT)}:{line_number}") if len(findings) > 25: msg_lines.append(f"... and {len(findings) - 25} more") msg_lines.append("Remove the credential from the repository and rotate/revoke it.") fail("\n".join(msg_lines)) def main() -> int: # Combined iteration and validation files = validate_and_collect_files() scan_for_secrets() relative_paths = [str(file_path.relative_to(REPO_ROOT)) for file_path in files] print("OK: found source files:") for relative_path in relative_paths: print(f"- {relative_path}") return 0 if __name__ == "__main__": raise SystemExit(main())