MQL5-Google-Onedrive/scripts/ci_validate_repo.py
google-labs-jules[bot] 565e0f72d0 Bolt: optimize repository validation with chunked reading and early size checks
Optimized `scripts/ci_validate_repo.py` for performance and memory efficiency:
- Refactored `check_no_nul_bytes` and `check_reasonable_size` into a single-pass `validate_files` function.
- Implemented early size check using `p.stat().st_size` to avoid reading files larger than 5MB.
- Replaced `p.read_bytes()` with chunked binary reading (64KB chunks) to detect NUL bytes, significantly reducing memory overhead.
- Added performance comments and maintained the efficient `rglob('*')` traversal pattern.

Measurements:
- Reduced I/O passes from 2 to 1.
- Reduced peak memory usage for large files from O(N) to O(1) buffer size.
- Faster rejection for oversized files (rejection before reading).
2026-02-19 19:51:26 +00:00

66 lines
2.1 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Lightweight repository sanity checks suitable for GitHub Actions.
This is intentionally NOT a compiler for MQL5 (MetaEditor isn't available on CI).
"""
from __future__ import annotations
import sys
from pathlib import Path
REPO_ROOT = Path(__file__).resolve().parents[1]
MQL5_DIR = REPO_ROOT / "mt5" / "MQL5"
def fail(msg: str) -> None:
print(f"ERROR: {msg}", file=sys.stderr)
raise SystemExit(1)
def iter_source_files() -> list[Path]:
if not MQL5_DIR.exists():
fail(f"Missing directory: {MQL5_DIR}")
files: list[Path] = []
# ⚡ Bolt: rglob('*') followed by suffix filtering is more efficient than
# multiple targeted globs (like rglob('*.mq5') and rglob('*.mqh'))
# as it avoids repeated traversals of the file system tree.
for p in MQL5_DIR.rglob("*"):
if p.is_file() and p.suffix.lower() in {".mq5", ".mqh"}:
files.append(p)
if not files:
fail(f"No .mq5/.mqh files found under {MQL5_DIR}")
return sorted(files)
def validate_files(files: list[Path]) -> None:
"""⚡ Bolt: Single-pass validation for performance and memory efficiency."""
for p in files:
# ⚡ Bolt: Use stat() for early size check before reading file content.
# This prevents loading unexpectedly large files into memory.
stat = p.stat()
if stat.st_size > 5_000_000:
fail(f"Unexpectedly large source file (>5MB): {p.relative_to(REPO_ROOT)} ({stat.st_size} bytes)")
# ⚡ Bolt: Use chunked binary reading (64KB) to detect NUL bytes efficiently.
# This minimizes memory overhead compared to full file reads.
with p.open("rb") as f:
while chunk := f.read(65536):
if b"\x00" in chunk:
fail(f"NUL byte found in {p.relative_to(REPO_ROOT)}")
def main() -> int:
files = iter_source_files()
validate_files(files)
rel = [str(p.relative_to(REPO_ROOT)) for p in files]
print("OK: found source files:")
for r in rel:
print(f"- {r}")
return 0
if __name__ == "__main__":
raise SystemExit(main())