-
Notifications
You must be signed in to change notification settings - Fork 1
Add script to validate upstream references in PR branch commits #22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: mainline
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import subprocess | ||
import re | ||
import sys | ||
import textwrap | ||
|
||
def run_git(repo, args): | ||
"""Run a git command in the given repository and return its output as a string.""" | ||
result = subprocess.run(['git', '-C', repo] + args, text=True, capture_output=True, check=False) | ||
if result.returncode != 0: | ||
raise RuntimeError(f"Git command failed: {' '.join(args)}\n{result.stderr}") | ||
return result.stdout | ||
|
||
def ref_exists(repo, ref): | ||
"""Return True if the given ref exists in the repository, False otherwise.""" | ||
try: | ||
run_git(repo, ['rev-parse', '--verify', '--quiet', ref]) | ||
return True | ||
except RuntimeError: | ||
return False | ||
|
||
def get_pr_commits(repo, pr_branch, base_branch): | ||
"""Get a list of commit SHAs that are in the PR branch but not in the base branch.""" | ||
output = run_git(repo, ['rev-list', f'{base_branch}..{pr_branch}']) | ||
return output.strip().splitlines() | ||
|
||
def get_commit_message(repo, sha): | ||
"""Get the commit message for a given commit SHA.""" | ||
return run_git(repo, ['log', '-n', '1', '--format=%B', sha]) | ||
|
||
def get_short_hash_and_subject(repo, sha): | ||
"""Get the abbreviated commit hash and subject for a given commit SHA.""" | ||
output = run_git(repo, ['log', '-n', '1', '--format=%h%x00%s', sha]).strip() | ||
short_hash, subject = output.split('\x00', 1) | ||
return short_hash, subject | ||
|
||
def hash_exists_in_mainline(repo, upstream_ref, hash_): | ||
""" | ||
Return True if hash_ is reachable from upstream_ref (i.e., is an ancestor of it). | ||
""" | ||
try: | ||
run_git(repo, ['merge-base', '--is-ancestor', hash_, upstream_ref]) | ||
return True | ||
except RuntimeError: | ||
return False | ||
|
||
def find_fixes_in_mainline(repo, upstream_ref, hash_): | ||
""" | ||
Return unique commits in upstream_ref that have Fixes: <N chars of hash_> in their message, case-insensitive. | ||
Start from 12 chars and work down to 6, but do not include duplicates if already found at a longer length. | ||
""" | ||
seen_commits = set() | ||
results = [] | ||
for length in range(12, 5, -1): # 12 down to 6 | ||
short_hash = hash_[:length] | ||
output = run_git(repo, [ | ||
'log', upstream_ref, '--grep', f'Fixes: {short_hash}', '-i', '--format=%H %h %s (%an)' | ||
]).strip() | ||
if output: | ||
for line in output.splitlines(): | ||
full_hash = line.split()[0] | ||
if full_hash not in seen_commits: | ||
seen_commits.add(full_hash) | ||
results.append(' '.join(line.split()[1:])) | ||
return "\n".join(results) | ||
|
||
def wrap_paragraph(text, width=80, initial_indent='', subsequent_indent=''): | ||
"""Wrap a paragraph of text to the specified width and indentation.""" | ||
wrapper = textwrap.TextWrapper(width=width, | ||
initial_indent=initial_indent, | ||
subsequent_indent=subsequent_indent, | ||
break_long_words=False, | ||
break_on_hyphens=False) | ||
return wrapper.fill(text) | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description="Check upstream references and Fixes: tags in PR branch commits.") | ||
parser.add_argument("repo", help="Path to the git repo") | ||
parser.add_argument("pr_branch", help="Name of the PR branch") | ||
parser.add_argument("base_branch", help="Name of the base branch") | ||
parser.add_argument("--markdown", action='store_true', help="Output in Markdown, suitable for GitHub PR comments") | ||
args = parser.parse_args() | ||
|
||
upstream_ref = 'origin/kernel-mainline' | ||
|
||
# Validate that all required refs exist before continuing | ||
missing_refs = [] | ||
for refname, refval in [('upstream reference', upstream_ref), | ||
('PR branch', args.pr_branch), | ||
('base branch', args.base_branch)]: | ||
if not ref_exists(args.repo, refval): | ||
missing_refs.append((refname, refval)) | ||
if missing_refs: | ||
for refname, refval in missing_refs: | ||
print(f"ERROR: The {refname} '{refval}' does not exist in the given repo.") | ||
print("Please fetch or create the required references before running this script.") | ||
sys.exit(1) | ||
|
||
pr_commits = get_pr_commits(args.repo, args.pr_branch, args.base_branch) | ||
if not pr_commits: | ||
if args.markdown: | ||
print("> ℹ️ **No commits found in PR branch that are not in base branch.**") | ||
else: | ||
print("No commits found in PR branch that are not in base branch.") | ||
sys.exit(0) | ||
|
||
any_findings = False | ||
out_lines = [] | ||
|
||
for sha in reversed(pr_commits): # oldest first | ||
short_hash, subject = get_short_hash_and_subject(args.repo, sha) | ||
pr_commit_desc = f"{short_hash} ({subject})" | ||
msg = get_commit_message(args.repo, sha) | ||
upstream_hashes = re.findall(r'^commit\s+([0-9a-fA-F]{12,40})', msg, re.MULTILINE) | ||
for uhash in upstream_hashes: | ||
short_uhash = uhash[:12] | ||
# Ensure the referenced commit in the PR actually exists in the upstream ref. | ||
exists = hash_exists_in_mainline(args.repo, upstream_ref, uhash) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe just a simple comment that for this we're checking that the PR There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will add |
||
if not exists: | ||
any_findings = True | ||
if args.markdown: | ||
out_lines.append( | ||
f"- ❗ PR commit `{pr_commit_desc}` references upstream commit \n" | ||
f" `{short_uhash}` which does **not** exist in the upstream Linux kernel.\n" | ||
) | ||
else: | ||
header = (f"[NOTFOUND] PR commit {pr_commit_desc} references upstream commit " | ||
f"{short_uhash}, which does not exist in kernel-mainline.") | ||
out_lines.append( | ||
wrap_paragraph(header, width=80, initial_indent='', | ||
subsequent_indent=' ') # 11 spaces for '[NOTFOUND] ' | ||
) | ||
out_lines.append("") # blank line | ||
continue | ||
fixes = find_fixes_in_mainline(args.repo, upstream_ref, uhash) | ||
if fixes: | ||
any_findings = True | ||
if args.markdown: | ||
fixes_block = " " + fixes.replace("\n", "\n ") | ||
out_lines.append( | ||
f"- ⚠️ PR commit `{pr_commit_desc}` references upstream commit \n" | ||
f" `{short_uhash}` which has been referenced by a `Fixes:` tag in the upstream \n" | ||
f" Linux kernel:\n\n" | ||
f"```text\n{fixes_block}\n```\n" | ||
) | ||
else: | ||
header = (f"[FIXES] PR commit {pr_commit_desc} references upstream commit " | ||
f"{short_uhash}, which has Fixes tags:") | ||
out_lines.append( | ||
wrap_paragraph(header, width=80, initial_indent='', | ||
subsequent_indent=' ') # 8 spaces for '[FIXES] ' | ||
) | ||
out_lines.append("") # blank line after 'Fixes tags:' | ||
for line in fixes.splitlines(): | ||
out_lines.append(' ' + line) | ||
out_lines.append("") # blank line | ||
|
||
if any_findings: | ||
if args.markdown: | ||
print("## :mag: Upstream Linux Kernel Commit Check\n") | ||
print('\n'.join(out_lines)) | ||
print("*This is an automated message from the kernel commit checker workflow.*") | ||
else: | ||
print('\n'.join(out_lines)) | ||
else: | ||
if args.markdown: | ||
print("> ✅ **All referenced commits exist upstream and have no Fixes: tags.**") | ||
else: | ||
print("All referenced commits exist upstream and have no Fixes: tags.") | ||
|
||
if __name__ == "__main__": | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not in favor of this.
parser.add_argument
includes arequired=True
functional argument to fail out early. This seems like an easy way to mess something up in the future.See Reference: https://github.com/ctrliq/kernel-src-tree-tools/blob/mainline/rolling-release-update.py#L85
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The 'repo', 'pr_branch', and 'base_branch' arguments are defined as positional arguments (they aren't optional with - or --). So if you don't have three arguments you'll get an error right away. Once we have those arguments this code is checking to make sure that those references actually exist in the git repo we are operating on (along with the implied reference origin/kernel-mainline).