Skip to content

Add script to validate upstream references in PR branch commits #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: mainline
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 174 additions & 0 deletions check_kernel_commits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#!/usr/bin/env python3

import argparse
import subprocess
import re
import sys
import textwrap

def run_git(repo, args):
"""Run a git command in the given repository and return its output as a string."""
result = subprocess.run(['git', '-C', repo] + args, text=True, capture_output=True, check=False)
if result.returncode != 0:
raise RuntimeError(f"Git command failed: {' '.join(args)}\n{result.stderr}")
return result.stdout

def ref_exists(repo, ref):
"""Return True if the given ref exists in the repository, False otherwise."""
try:
run_git(repo, ['rev-parse', '--verify', '--quiet', ref])
return True
except RuntimeError:
return False

def get_pr_commits(repo, pr_branch, base_branch):
"""Get a list of commit SHAs that are in the PR branch but not in the base branch."""
output = run_git(repo, ['rev-list', f'{base_branch}..{pr_branch}'])
return output.strip().splitlines()

def get_commit_message(repo, sha):
"""Get the commit message for a given commit SHA."""
return run_git(repo, ['log', '-n', '1', '--format=%B', sha])

def get_short_hash_and_subject(repo, sha):
"""Get the abbreviated commit hash and subject for a given commit SHA."""
output = run_git(repo, ['log', '-n', '1', '--format=%h%x00%s', sha]).strip()
short_hash, subject = output.split('\x00', 1)
return short_hash, subject

def hash_exists_in_mainline(repo, upstream_ref, hash_):
"""
Return True if hash_ is reachable from upstream_ref (i.e., is an ancestor of it).
"""
try:
run_git(repo, ['merge-base', '--is-ancestor', hash_, upstream_ref])
return True
except RuntimeError:
return False

def find_fixes_in_mainline(repo, upstream_ref, hash_):
"""
Return unique commits in upstream_ref that have Fixes: <N chars of hash_> in their message, case-insensitive.
Start from 12 chars and work down to 6, but do not include duplicates if already found at a longer length.
"""
seen_commits = set()
results = []
for length in range(12, 5, -1): # 12 down to 6
short_hash = hash_[:length]
output = run_git(repo, [
'log', upstream_ref, '--grep', f'Fixes: {short_hash}', '-i', '--format=%H %h %s (%an)'
]).strip()
if output:
for line in output.splitlines():
full_hash = line.split()[0]
if full_hash not in seen_commits:
seen_commits.add(full_hash)
results.append(' '.join(line.split()[1:]))
return "\n".join(results)

def wrap_paragraph(text, width=80, initial_indent='', subsequent_indent=''):
"""Wrap a paragraph of text to the specified width and indentation."""
wrapper = textwrap.TextWrapper(width=width,
initial_indent=initial_indent,
subsequent_indent=subsequent_indent,
break_long_words=False,
break_on_hyphens=False)
return wrapper.fill(text)

def main():
parser = argparse.ArgumentParser(description="Check upstream references and Fixes: tags in PR branch commits.")
parser.add_argument("repo", help="Path to the git repo")
parser.add_argument("pr_branch", help="Name of the PR branch")
parser.add_argument("base_branch", help="Name of the base branch")
parser.add_argument("--markdown", action='store_true', help="Output in Markdown, suitable for GitHub PR comments")
args = parser.parse_args()

upstream_ref = 'origin/kernel-mainline'

# Validate that all required refs exist before continuing
missing_refs = []
for refname, refval in [('upstream reference', upstream_ref),
('PR branch', args.pr_branch),
('base branch', args.base_branch)]:
if not ref_exists(args.repo, refval):
missing_refs.append((refname, refval))
if missing_refs:
for refname, refval in missing_refs:
print(f"ERROR: The {refname} '{refval}' does not exist in the given repo.")
print("Please fetch or create the required references before running this script.")
sys.exit(1)
Comment on lines +90 to +99
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not in favor of this.
parser.add_argument includes a required=True functional argument to fail out early. This seems like an easy way to mess something up in the future.

See Reference: https://github.com/ctrliq/kernel-src-tree-tools/blob/mainline/rolling-release-update.py#L85

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The 'repo', 'pr_branch', and 'base_branch' arguments are defined as positional arguments (they aren't optional with - or --). So if you don't have three arguments you'll get an error right away. Once we have those arguments this code is checking to make sure that those references actually exist in the git repo we are operating on (along with the implied reference origin/kernel-mainline).


pr_commits = get_pr_commits(args.repo, args.pr_branch, args.base_branch)
if not pr_commits:
if args.markdown:
print("> ℹ️ **No commits found in PR branch that are not in base branch.**")
else:
print("No commits found in PR branch that are not in base branch.")
sys.exit(0)

any_findings = False
out_lines = []

for sha in reversed(pr_commits): # oldest first
short_hash, subject = get_short_hash_and_subject(args.repo, sha)
pr_commit_desc = f"{short_hash} ({subject})"
msg = get_commit_message(args.repo, sha)
upstream_hashes = re.findall(r'^commit\s+([0-9a-fA-F]{12,40})', msg, re.MULTILINE)
for uhash in upstream_hashes:
short_uhash = uhash[:12]
# Ensure the referenced commit in the PR actually exists in the upstream ref.
exists = hash_exists_in_mainline(args.repo, upstream_ref, uhash)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe just a simple comment that for this we're checking that the PR commit exists upstream.
It took me a quick minute for me to realize what was happening.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will add

if not exists:
any_findings = True
if args.markdown:
out_lines.append(
f"- ❗ PR commit `{pr_commit_desc}` references upstream commit \n"
f" `{short_uhash}` which does **not** exist in the upstream Linux kernel.\n"
)
else:
header = (f"[NOTFOUND] PR commit {pr_commit_desc} references upstream commit "
f"{short_uhash}, which does not exist in kernel-mainline.")
out_lines.append(
wrap_paragraph(header, width=80, initial_indent='',
subsequent_indent=' ') # 11 spaces for '[NOTFOUND] '
)
out_lines.append("") # blank line
continue
fixes = find_fixes_in_mainline(args.repo, upstream_ref, uhash)
if fixes:
any_findings = True
if args.markdown:
fixes_block = " " + fixes.replace("\n", "\n ")
out_lines.append(
f"- ⚠️ PR commit `{pr_commit_desc}` references upstream commit \n"
f" `{short_uhash}` which has been referenced by a `Fixes:` tag in the upstream \n"
f" Linux kernel:\n\n"
f"```text\n{fixes_block}\n```\n"
)
else:
header = (f"[FIXES] PR commit {pr_commit_desc} references upstream commit "
f"{short_uhash}, which has Fixes tags:")
out_lines.append(
wrap_paragraph(header, width=80, initial_indent='',
subsequent_indent=' ') # 8 spaces for '[FIXES] '
)
out_lines.append("") # blank line after 'Fixes tags:'
for line in fixes.splitlines():
out_lines.append(' ' + line)
out_lines.append("") # blank line

if any_findings:
if args.markdown:
print("## :mag: Upstream Linux Kernel Commit Check\n")
print('\n'.join(out_lines))
print("*This is an automated message from the kernel commit checker workflow.*")
else:
print('\n'.join(out_lines))
else:
if args.markdown:
print("> ✅ **All referenced commits exist upstream and have no Fixes: tags.**")
else:
print("All referenced commits exist upstream and have no Fixes: tags.")

if __name__ == "__main__":
main()