MQL5-Google-Onedrive/scripts/review_pull_requests.py

Name: MQL5 Algo Forge
Brand: MQL5
#!/usr/bin/env python3
"""
Pull Request Review Script
Reviews all pull requests and creates a comprehensive summary
"""

import subprocess
import sys
import json
from pathlib import Path
from datetime import datetime
from collections import defaultdict

REPO_ROOT = Path(__file__).resolve().parents[1]

# ⚡ Bolt: Global cache for branch metadata to avoid redundant git subprocess calls.
BRANCH_METADATA_CACHE = {}


def get_git_version():
    """Returns the git version as a tuple of integers."""
    res = run_command(["git", "--version"])
    if res and res.returncode == 0:
        try:
            # Format: "git version 2.52.0"
            parts = res.stdout.strip().split(" ")[2].split(".")
            return tuple(int(p) for p in parts if p.isdigit())
        except (IndexError, ValueError):
            return (0, 0, 0)
    return (0, 0, 0)


def run_command(cmd, capture_output=True):
    """Run a command and return the result."""
    try:
        result = subprocess.run(
            cmd,
            cwd=REPO_ROOT,
            capture_output=capture_output,
            text=True,
            timeout=30,
            encoding='utf-8',
            errors='replace'
        )
        return result
    except Exception as e:
        print(f"Error running command: {e}", file=sys.stderr)
        return None


def get_prs_via_gh_cli():
    """Get PRs using GitHub CLI."""
    result = run_command(["gh", "pr", "list", "--state", "all", "--json", "number,title,state,author,createdAt,updatedAt,headRefName,baseRefName,isDraft,labels"])
    if result and result.returncode == 0:
        try:
            return json.loads(result.stdout)
        except json.JSONDecodeError:
            return []
    return None


def get_prs_via_git():
    """⚡ Bolt: Optimized branch metadata retrieval using git for-each-ref bulk fetch."""
    global BRANCH_METADATA_CACHE
    BRANCH_METADATA_CACHE = {}

    git_ver = get_git_version()
    # %(ahead-behind) atom was added in Git 2.41+
    use_ahead_behind = git_ver >= (2, 41)

    # ⚡ Bolt: Fetch all required metadata in a single subprocess call.
    # Format: refname | ahead-behind | date | subject
    # ahead-behind returns "ahead behind" counts relative to the base branch.
    fmt = "%(refname:short)|%(ahead-behind:main)|%(committerdate:iso8601)|%(subject)" if use_ahead_behind else "%(refname:short)|0 0|%(committerdate:iso8601)|%(subject)"

    result = run_command(["git", "for-each-ref", f"--format={fmt}", "refs/remotes"])

    open_branches = []
    merged_branches = []

    if result and result.returncode == 0:
        lines = result.stdout.strip().split("\n")
        for line in lines:
            if not line.strip():
                continue

            # ⚡ Bolt: Use maxsplit to handle delimiters in commit subjects.
            parts = line.split("|", 3)
            if len(parts) < 4:
                continue

            ref, ab, date, subject = parts

            # ⚡ Bolt: Tighten filtering to only exclude the base branch and HEAD,
            # while allowing feature branches that might contain "main" in their name.
            if ref.endswith("/main") or "/HEAD" in ref or ref == "origin":
                continue

            # Parse ahead-behind counts: "1 0" means 1 ahead, 0 behind.
            # A branch is merged if it has 0 commits ahead of main.
            try:
                # If git version is < 2.41, ahead will be -1 (fallback).
                ahead = int(ab.split(" ")[0]) if use_ahead_behind else -1
            except (ValueError, IndexError):
                ahead = -1

            # Populate cache
            BRANCH_METADATA_CACHE[ref] = {
                "branch": ref.replace("origin/", ""),
                "full_name": ref,
                # ⚡ Bolt: For Git < 2.41, set to -1 to trigger lazy fetch in get_branch_info.
                "commit_count": ahead,
                "commits": [subject],
                "last_commit_date": date
            }

            if ahead == 0:
                merged_branches.append(ref)
            else:
                # If ahead is -1 (fallback), we'll assume it's open for now
                open_branches.append(ref)

    # ⚡ Bolt: If ahead-behind was not available, fall back to slower individual checks only for merged status.
    if not use_ahead_behind:
        res_merged = run_command(["git", "branch", "-r", "--merged", "main"])
        if res_merged and res_merged.returncode == 0:
            merged_list = [b.strip() for b in res_merged.stdout.split("\n")]
            merged_branches = [b for b in open_branches if b in merged_list]
            open_branches = [b for b in open_branches if b not in merged_list]

    return {
        "open": open_branches,
        "merged": merged_branches
    }


def analyze_branch_name(branch_name):
    """Analyze branch name to extract PR information."""
    branch = branch_name.replace("origin/", "")

    info = {
        "type": "unknown",
        "category": "other",
        "description": branch
    }

    # Categorize branches
    if branch.startswith("Cursor/"):
        info["type"] = "cursor"
        info["category"] = "ai-generated"
        info["description"] = branch.replace("Cursor/A6-9V/", "")
    elif branch.startswith("copilot/"):
        info["type"] = "copilot"
        info["category"] = "ai-generated"
        info["description"] = branch.replace("copilot/", "")
    elif branch.startswith("bolt-"):
        info["type"] = "bolt"
        info["category"] = "optimization"
        info["description"] = branch.replace("bolt-", "")
    elif branch.startswith("feat/"):
        info["type"] = "feature"
        info["category"] = "feature"
        info["description"] = branch.replace("feat/", "")
    elif branch.startswith("feature/"):
        info["type"] = "feature"
        info["category"] = "feature"
        info["description"] = branch.replace("feature/", "")

    return info


def get_branch_info(branch_name):
    """⚡ Bolt: Optimized branch info retrieval using global metadata cache."""
    if branch_name in BRANCH_METADATA_CACHE:
        info = BRANCH_METADATA_CACHE[branch_name]
        # ⚡ Bolt: If commit_count is -1 (Git < 2.41) or we need more than 1 commit,
        # perform a lazy fetch to ensure data accuracy.
        if info["commit_count"] == -1 or (info["commit_count"] > 1 and len(info["commits"]) == 1):
             result = run_command(["git", "log", "--oneline", "-n", "5", f"main..{branch_name}"])
             if result and result.returncode == 0:
                 info["commits"] = [c.strip() for c in result.stdout.strip().split("\n") if c.strip()]
                 info["commit_count"] = len(info["commits"]) if info["commit_count"] == -1 else info["commit_count"]
        return info

    # Fallback to slow path if not in cache (should not happen with for-each-ref logic)
    branch = branch_name.replace("origin/", "")
    result = run_command(["git", "log", "--oneline", f"main..{branch_name}"])
    commits = [c.strip() for c in result.stdout.strip().split("\n") if c.strip()] if result and result.returncode == 0 else []

    result_date = run_command(["git", "log", "-1", "--format=%ci", branch_name])
    last_commit = result_date.stdout.strip() if result_date and result_date.returncode == 0 else None

    return {
        "branch": branch,
        "full_name": branch_name,
        "commit_count": len(commits),
        "commits": commits[:5],
        "last_commit_date": last_commit
    }


def main():
    """Main review function."""
    print("=" * 80)
    print("PULL REQUEST REVIEW")
    print(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print("=" * 80)
    print()

    # Try GitHub CLI first
    prs = get_prs_via_gh_cli()

    if prs is not None:
        print(f"Found {len(prs)} pull requests via GitHub CLI")
        print()

        # Group by state
        by_state = defaultdict(list)
        for pr in prs:
            by_state[pr.get("state", "unknown")].append(pr)

        print("Pull Requests by State:")
        for state, pr_list in sorted(by_state.items()):
            print(f"  {state.upper()}: {len(pr_list)}")
        print()

        # Show open PRs
        open_prs = by_state.get("OPEN", [])
        if open_prs:
            print("=" * 80)
            print("OPEN PULL REQUESTS")
            print("=" * 80)
            for pr in open_prs:
                print(f"\nPR #{pr.get('number', 'N/A')}: {pr.get('title', 'No title')}")
                print(f"  Author: {pr.get('author', {}).get('login', 'Unknown')}")
                print(f"  Branch: {pr.get('headRefName', 'N/A')} -> {pr.get('baseRefName', 'main')}")
                print(f"  Created: {pr.get('createdAt', 'N/A')}")
                print(f"  Updated: {pr.get('updatedAt', 'N/A')}")
                print(f"  Draft: {'Yes' if pr.get('isDraft') else 'No'}")
                labels = [l.get('name') for l in pr.get('labels', [])]
                if labels:
                    print(f"  Labels: {', '.join(labels)}")

        # Show merged PRs
        merged_prs = by_state.get("MERGED", [])
        if merged_prs:
            print("\n" + "=" * 80)
            print(f"MERGED PULL REQUESTS ({len(merged_prs)} total)")
            print("=" * 80)
            print(f"\nShowing last 10 merged PRs:")
            for pr in merged_prs[-10:]:
                print(f"  PR #{pr.get('number', 'N/A')}: {pr.get('title', 'No title')}")

    else:
        # Fallback to git branch analysis
        print("GitHub CLI not available, analyzing branches...")
        print()

        branch_info = get_prs_via_git()

        open_branches = branch_info["open"]
        merged_branches = branch_info["merged"]

        print(f"Open branches (potential PRs): {len(open_branches)}")
        print(f"Merged branches (completed PRs): {len(merged_branches)}")
        print()

        # Categorize open branches
        categories = defaultdict(list)
        for branch in open_branches:
            info = analyze_branch_name(branch)
            categories[info["category"]].append((branch, info))

        print("=" * 80)
        print("OPEN BRANCHES (Potential Pull Requests)")
        print("=" * 80)
        print()

        for category, branches in sorted(categories.items()):
            print(f"{category.upper()}: {len(branches)} branches")
            for branch, info in branches[:10]:  # Show first 10
                branch_details = get_branch_info(branch)
                print(f"  - {info['description']}")
                print(f"    Branch: {branch_details['branch']}")
                print(f"    Commits: {branch_details['commit_count']}")
                if branch_details['last_commit_date']:
                    print(f"    Last commit: {branch_details['last_commit_date']}")
            if len(branches) > 10:
                print(f"    ... and {len(branches) - 10} more")
            print()

        print("=" * 80)
        print("MERGED BRANCHES (Completed Pull Requests)")
        print("=" * 80)
        print(f"\nTotal merged: {len(merged_branches)}")
        print("\nRecent merged branches:")
        for branch in merged_branches[:20]:
            info = analyze_branch_name(branch)
            print(f"  - {info['description']}")

    print("\n" + "=" * 80)
    print("REVIEW COMPLETE")
    print("=" * 80)
    print("\nNote: GitHub doesn't support 'pinning' pull requests directly.")
    print("Consider:")
    print("1. Creating a tracking issue for important PRs")
    print("2. Using labels to categorize PRs")
    print("3. Adding PRs to project boards")
    print("4. Creating a PR summary document")


if __name__ == "__main__":
    main()