diff --git a/check_kernel_commits.py b/check_kernel_commits.py new file mode 100644 index 0000000..84317e3 --- /dev/null +++ b/check_kernel_commits.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 + +import argparse +import subprocess +import re +import sys +import textwrap + +def run_git(repo, args): + """Run a git command in the given repository and return its output as a string.""" + result = subprocess.run(['git', '-C', repo] + args, text=True, capture_output=True, check=False) + if result.returncode != 0: + raise RuntimeError(f"Git command failed: {' '.join(args)}\n{result.stderr}") + return result.stdout + +def ref_exists(repo, ref): + """Return True if the given ref exists in the repository, False otherwise.""" + try: + run_git(repo, ['rev-parse', '--verify', '--quiet', ref]) + return True + except RuntimeError: + return False + +def get_pr_commits(repo, pr_branch, base_branch): + """Get a list of commit SHAs that are in the PR branch but not in the base branch.""" + output = run_git(repo, ['rev-list', f'{base_branch}..{pr_branch}']) + return output.strip().splitlines() + +def get_commit_message(repo, sha): + """Get the commit message for a given commit SHA.""" + return run_git(repo, ['log', '-n', '1', '--format=%B', sha]) + +def get_short_hash_and_subject(repo, sha): + """Get the abbreviated commit hash and subject for a given commit SHA.""" + output = run_git(repo, ['log', '-n', '1', '--format=%h%x00%s', sha]).strip() + short_hash, subject = output.split('\x00', 1) + return short_hash, subject + +def hash_exists_in_mainline(repo, upstream_ref, hash_): + """ + Return True if hash_ is reachable from upstream_ref (i.e., is an ancestor of it). + """ + try: + run_git(repo, ['merge-base', '--is-ancestor', hash_, upstream_ref]) + return True + except RuntimeError: + return False + +def find_fixes_in_mainline(repo, upstream_ref, hash_): + """ + Return unique commits in upstream_ref that have Fixes: in their message, case-insensitive. + Start from 12 chars and work down to 6, but do not include duplicates if already found at a longer length. + """ + seen_commits = set() + results = [] + for length in range(12, 5, -1): # 12 down to 6 + short_hash = hash_[:length] + output = run_git(repo, [ + 'log', upstream_ref, '--grep', f'Fixes: {short_hash}', '-i', '--format=%H %h %s (%an)' + ]).strip() + if output: + for line in output.splitlines(): + full_hash = line.split()[0] + if full_hash not in seen_commits: + seen_commits.add(full_hash) + results.append(' '.join(line.split()[1:])) + return "\n".join(results) + +def wrap_paragraph(text, width=80, initial_indent='', subsequent_indent=''): + """Wrap a paragraph of text to the specified width and indentation.""" + wrapper = textwrap.TextWrapper(width=width, + initial_indent=initial_indent, + subsequent_indent=subsequent_indent, + break_long_words=False, + break_on_hyphens=False) + return wrapper.fill(text) + +def main(): + parser = argparse.ArgumentParser(description="Check upstream references and Fixes: tags in PR branch commits.") + parser.add_argument("repo", help="Path to the git repo") + parser.add_argument("pr_branch", help="Name of the PR branch") + parser.add_argument("base_branch", help="Name of the base branch") + parser.add_argument("--markdown", action='store_true', help="Output in Markdown, suitable for GitHub PR comments") + args = parser.parse_args() + + upstream_ref = 'origin/kernel-mainline' + + # Validate that all required refs exist before continuing + missing_refs = [] + for refname, refval in [('upstream reference', upstream_ref), + ('PR branch', args.pr_branch), + ('base branch', args.base_branch)]: + if not ref_exists(args.repo, refval): + missing_refs.append((refname, refval)) + if missing_refs: + for refname, refval in missing_refs: + print(f"ERROR: The {refname} '{refval}' does not exist in the given repo.") + print("Please fetch or create the required references before running this script.") + sys.exit(1) + + pr_commits = get_pr_commits(args.repo, args.pr_branch, args.base_branch) + if not pr_commits: + if args.markdown: + print("> ℹ️ **No commits found in PR branch that are not in base branch.**") + else: + print("No commits found in PR branch that are not in base branch.") + sys.exit(0) + + any_findings = False + out_lines = [] + + for sha in reversed(pr_commits): # oldest first + short_hash, subject = get_short_hash_and_subject(args.repo, sha) + pr_commit_desc = f"{short_hash} ({subject})" + msg = get_commit_message(args.repo, sha) + upstream_hashes = re.findall(r'^commit\s+([0-9a-fA-F]{12,40})', msg, re.MULTILINE) + for uhash in upstream_hashes: + short_uhash = uhash[:12] + # Ensure the referenced commit in the PR actually exists in the upstream ref. + exists = hash_exists_in_mainline(args.repo, upstream_ref, uhash) + if not exists: + any_findings = True + if args.markdown: + out_lines.append( + f"- ❗ PR commit `{pr_commit_desc}` references upstream commit \n" + f" `{short_uhash}` which does **not** exist in the upstream Linux kernel.\n" + ) + else: + header = (f"[NOTFOUND] PR commit {pr_commit_desc} references upstream commit " + f"{short_uhash}, which does not exist in kernel-mainline.") + out_lines.append( + wrap_paragraph(header, width=80, initial_indent='', + subsequent_indent=' ') # 11 spaces for '[NOTFOUND] ' + ) + out_lines.append("") # blank line + continue + fixes = find_fixes_in_mainline(args.repo, upstream_ref, uhash) + if fixes: + any_findings = True + if args.markdown: + fixes_block = " " + fixes.replace("\n", "\n ") + out_lines.append( + f"- ⚠️ PR commit `{pr_commit_desc}` references upstream commit \n" + f" `{short_uhash}` which has been referenced by a `Fixes:` tag in the upstream \n" + f" Linux kernel:\n\n" + f"```text\n{fixes_block}\n```\n" + ) + else: + header = (f"[FIXES] PR commit {pr_commit_desc} references upstream commit " + f"{short_uhash}, which has Fixes tags:") + out_lines.append( + wrap_paragraph(header, width=80, initial_indent='', + subsequent_indent=' ') # 8 spaces for '[FIXES] ' + ) + out_lines.append("") # blank line after 'Fixes tags:' + for line in fixes.splitlines(): + out_lines.append(' ' + line) + out_lines.append("") # blank line + + if any_findings: + if args.markdown: + print("## :mag: Upstream Linux Kernel Commit Check\n") + print('\n'.join(out_lines)) + print("*This is an automated message from the kernel commit checker workflow.*") + else: + print('\n'.join(out_lines)) + else: + if args.markdown: + print("> ✅ **All referenced commits exist upstream and have no Fixes: tags.**") + else: + print("All referenced commits exist upstream and have no Fixes: tags.") + +if __name__ == "__main__": + main() \ No newline at end of file