Skip to content

Commit 71e3581

Browse files
miss-islingtonCAM-GerlachhugovkAA-Turner
authored
[3.12] gh-101100: Only show GitHub check annotations on changed doc paragraphs (GH-108065) (#108127)
gh-101100: Only show GitHub check annotations on changed doc paragraphs (GH-108065) * Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- (cherry picked from commit eb953d6) Co-authored-by: C.A.M. Gerlach <[email protected]> Co-authored-by: Hugo van Kemenade <[email protected]> Co-authored-by: Adam Turner <[email protected]>
1 parent a1808b8 commit 71e3581

File tree

2 files changed

+208
-31
lines changed

2 files changed

+208
-31
lines changed

.github/workflows/reusable-docs.yml

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,30 @@ jobs:
1616
name: 'Docs'
1717
runs-on: ubuntu-latest
1818
timeout-minutes: 60
19+
env:
20+
branch_base: 'origin/${{ github.event.pull_request.base.ref }}'
21+
branch_pr: 'origin/${{ github.event.pull_request.head.ref }}'
22+
refspec_base: '+${{ github.event.pull_request.base.sha }}:remotes/origin/${{ github.event.pull_request.base.ref }}'
23+
refspec_pr: '+${{ github.event.pull_request.head.sha }}:remotes/origin/${{ github.event.pull_request.head.ref }}'
1924
steps:
20-
- uses: actions/checkout@v3
25+
- name: 'Check out latest PR branch commit'
26+
uses: actions/checkout@v3
27+
with:
28+
ref: ${{ github.event.pull_request.head.sha }}
29+
# Adapted from https://github.com/actions/checkout/issues/520#issuecomment-1167205721
30+
- name: 'Fetch commits to get branch diff'
31+
run: |
32+
# Fetch enough history to find a common ancestor commit (aka merge-base):
33+
git fetch origin ${{ env.refspec_pr }} --depth=$(( ${{ github.event.pull_request.commits }} + 1 )) \
34+
--no-tags --prune --no-recurse-submodules
35+
36+
# This should get the oldest commit in the local fetched history (which may not be the commit the PR branched from):
37+
COMMON_ANCESTOR=$( git rev-list --first-parent --max-parents=0 --max-count=1 ${{ env.branch_pr }} )
38+
DATE=$( git log --date=iso8601 --format=%cd "${COMMON_ANCESTOR}" )
39+
40+
# Get all commits since that commit date from the base branch (eg: master or main):
41+
git fetch origin ${{ env.refspec_base }} --shallow-since="${DATE}" \
42+
--no-tags --prune --no-recurse-submodules
2143
- name: 'Set up Python'
2244
uses: actions/setup-python@v4
2345
with:
@@ -28,13 +50,6 @@ jobs:
2850
run: make -C Doc/ venv
2951

3052
# To annotate PRs with Sphinx nitpicks (missing references)
31-
- name: 'Get list of changed files'
32-
if: github.event_name == 'pull_request'
33-
id: changed_files
34-
uses: Ana06/[email protected]
35-
with:
36-
filter: "Doc/**"
37-
format: csv # works for paths with spaces
3853
- name: 'Build HTML documentation'
3954
continue-on-error: true
4055
run: |
@@ -45,7 +60,7 @@ jobs:
4560
if: github.event_name == 'pull_request'
4661
run: |
4762
python Doc/tools/check-warnings.py \
48-
--check-and-annotate '${{ steps.changed_files.outputs.added_modified }}' \
63+
--annotate-diff '${{ env.branch_base }}' '${{ env.branch_pr }}' \
4964
--fail-if-regression \
5065
--fail-if-improved
5166

Doc/tools/check-warnings.py

Lines changed: 184 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,16 @@
22
"""
33
Check the output of running Sphinx in nit-picky mode (missing references).
44
"""
5+
from __future__ import annotations
6+
57
import argparse
6-
import csv
8+
import itertools
79
import os
810
import re
11+
import subprocess
912
import sys
1013
from pathlib import Path
14+
from typing import TextIO
1115

1216
# Exclude these whether they're dirty or clean,
1317
# because they trigger a rebuild of dirty files.
@@ -24,28 +28,178 @@
2428
"venv",
2529
}
2630

27-
PATTERN = re.compile(r"(?P<file>[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)")
31+
# Regex pattern to match the parts of a Sphinx warning
32+
WARNING_PATTERN = re.compile(
33+
r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
34+
)
35+
36+
# Regex pattern to match the line numbers in a Git unified diff
37+
DIFF_PATTERN = re.compile(
38+
r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@",
39+
flags=re.MULTILINE,
40+
)
41+
42+
43+
def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:
44+
"""List the files changed between two Git refs, filtered by change type."""
45+
added_files_result = subprocess.run(
46+
[
47+
"git",
48+
"diff",
49+
f"--diff-filter={filter_mode}",
50+
"--name-only",
51+
f"{ref_a}...{ref_b}",
52+
"--",
53+
],
54+
stdout=subprocess.PIPE,
55+
check=True,
56+
text=True,
57+
encoding="UTF-8",
58+
)
59+
60+
added_files = added_files_result.stdout.strip().split("\n")
61+
return {Path(file.strip()) for file in added_files if file.strip()}
62+
63+
64+
def get_diff_lines(ref_a: str, ref_b: str, file: Path) -> list[int]:
65+
"""List the lines changed between two Git refs for a specific file."""
66+
diff_output = subprocess.run(
67+
[
68+
"git",
69+
"diff",
70+
"--unified=0",
71+
f"{ref_a}...{ref_b}",
72+
"--",
73+
str(file),
74+
],
75+
stdout=subprocess.PIPE,
76+
check=True,
77+
text=True,
78+
encoding="UTF-8",
79+
)
80+
81+
# Scrape line offsets + lengths from diff and convert to line numbers
82+
line_matches = DIFF_PATTERN.finditer(diff_output.stdout)
83+
# Removed and added line counts are 1 if not printed
84+
line_match_values = [
85+
line_match.groupdict(default=1) for line_match in line_matches
86+
]
87+
line_ints = [
88+
(int(match_value["lineb"]), int(match_value["added"]))
89+
for match_value in line_match_values
90+
]
91+
line_ranges = [
92+
range(line_b, line_b + added) for line_b, added in line_ints
93+
]
94+
line_numbers = list(itertools.chain(*line_ranges))
95+
96+
return line_numbers
97+
98+
99+
def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:
100+
"""Get the line numbers of text in a file object, grouped by paragraph."""
101+
paragraphs = []
102+
prev_line = None
103+
for lineno, line in enumerate(file_obj):
104+
lineno = lineno + 1
105+
if prev_line is None or (line.strip() and not prev_line.strip()):
106+
paragraph = [lineno - 1]
107+
paragraphs.append(paragraph)
108+
paragraph.append(lineno)
109+
prev_line = line
110+
return paragraphs
111+
112+
113+
def filter_and_parse_warnings(
114+
warnings: list[str], files: set[Path]
115+
) -> list[re.Match[str]]:
116+
"""Get the warnings matching passed files and parse them with regex."""
117+
filtered_warnings = [
118+
warning
119+
for warning in warnings
120+
if any(str(file) in warning for file in files)
121+
]
122+
warning_matches = [
123+
WARNING_PATTERN.fullmatch(warning.strip())
124+
for warning in filtered_warnings
125+
]
126+
non_null_matches = [warning for warning in warning_matches if warning]
127+
return non_null_matches
128+
129+
130+
def filter_warnings_by_diff(
131+
warnings: list[re.Match[str]], ref_a: str, ref_b: str, file: Path
132+
) -> list[re.Match[str]]:
133+
"""Filter the passed per-file warnings to just those on changed lines."""
134+
diff_lines = get_diff_lines(ref_a, ref_b, file)
135+
with file.open(encoding="UTF-8") as file_obj:
136+
paragraphs = get_para_line_numbers(file_obj)
137+
touched_paras = [
138+
para_lines
139+
for para_lines in paragraphs
140+
if set(diff_lines) & set(para_lines)
141+
]
142+
touched_para_lines = set(itertools.chain(*touched_paras))
143+
warnings_infile = [
144+
warning for warning in warnings if str(file) in warning["file"]
145+
]
146+
warnings_touched = [
147+
warning
148+
for warning in warnings_infile
149+
if int(warning["line"]) in touched_para_lines
150+
]
151+
return warnings_touched
152+
28153

154+
def process_touched_warnings(
155+
warnings: list[str], ref_a: str, ref_b: str
156+
) -> list[re.Match[str]]:
157+
"""Filter a list of Sphinx warnings to those affecting touched lines."""
158+
added_files, modified_files = tuple(
159+
get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")
160+
)
161+
162+
warnings_added = filter_and_parse_warnings(warnings, added_files)
163+
warnings_modified = filter_and_parse_warnings(warnings, modified_files)
164+
165+
modified_files_warned = {
166+
file
167+
for file in modified_files
168+
if any(str(file) in warning["file"] for warning in warnings_modified)
169+
}
29170

30-
def check_and_annotate(warnings: list[str], files_to_check: str) -> None:
171+
warnings_modified_touched = [
172+
filter_warnings_by_diff(warnings_modified, ref_a, ref_b, file)
173+
for file in modified_files_warned
174+
]
175+
warnings_touched = warnings_added + list(
176+
itertools.chain(*warnings_modified_touched)
177+
)
178+
179+
return warnings_touched
180+
181+
182+
def annotate_diff(
183+
warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"
184+
) -> None:
31185
"""
32-
Convert Sphinx warning messages to GitHub Actions.
186+
Convert Sphinx warning messages to GitHub Actions for changed paragraphs.
33187
34188
Converts lines like:
35189
.../Doc/library/cgi.rst:98: WARNING: reference target not found
36190
to:
37191
::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
38192
39-
Non-matching lines are echoed unchanged.
40-
41-
see:
193+
See:
42194
https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
43195
"""
44-
files_to_check = next(csv.reader([files_to_check]))
45-
for warning in warnings:
46-
if any(filename in warning for filename in files_to_check):
47-
if match := PATTERN.fullmatch(warning):
48-
print("::warning file={file},line={line}::{msg}".format_map(match))
196+
warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)
197+
print("Emitting doc warnings matching modified lines:")
198+
for warning in warnings_touched:
199+
print("::warning file={file},line={line}::{msg}".format_map(warning))
200+
print(warning[0])
201+
if not warnings_touched:
202+
print("None")
49203

50204

51205
def fail_if_regression(
@@ -68,7 +222,7 @@ def fail_if_regression(
68222
print(filename)
69223
for warning in warnings:
70224
if filename in warning:
71-
if match := PATTERN.fullmatch(warning):
225+
if match := WARNING_PATTERN.fullmatch(warning):
72226
print(" {line}: {msg}".format_map(match))
73227
return -1
74228
return 0
@@ -91,12 +245,14 @@ def fail_if_improved(
91245
return 0
92246

93247

94-
def main() -> int:
248+
def main(argv: list[str] | None = None) -> int:
95249
parser = argparse.ArgumentParser()
96250
parser.add_argument(
97-
"--check-and-annotate",
98-
help="Comma-separated list of files to check, "
99-
"and annotate those with warnings on GitHub Actions",
251+
"--annotate-diff",
252+
nargs="*",
253+
metavar=("BASE_REF", "HEAD_REF"),
254+
help="Add GitHub Actions annotations on the diff for warnings on "
255+
"lines changed between the given refs (main and HEAD, by default)",
100256
)
101257
parser.add_argument(
102258
"--fail-if-regression",
@@ -108,13 +264,19 @@ def main() -> int:
108264
action="store_true",
109265
help="Fail if new files with no nits are found",
110266
)
111-
args = parser.parse_args()
267+
268+
args = parser.parse_args(argv)
269+
if args.annotate_diff is not None and len(args.annotate_diff) > 2:
270+
parser.error(
271+
"--annotate-diff takes between 0 and 2 ref args, not "
272+
f"{len(args.annotate_diff)} {tuple(args.annotate_diff)}"
273+
)
112274
exit_code = 0
113275

114276
wrong_directory_msg = "Must run this script from the repo root"
115277
assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg
116278

117-
with Path("Doc/sphinx-warnings.txt").open() as f:
279+
with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:
118280
warnings = f.read().splitlines()
119281

120282
cwd = str(Path.cwd()) + os.path.sep
@@ -124,15 +286,15 @@ def main() -> int:
124286
if "Doc/" in warning
125287
}
126288

127-
with Path("Doc/tools/.nitignore").open() as clean_files:
289+
with Path("Doc/tools/.nitignore").open(encoding="UTF-8") as clean_files:
128290
files_with_expected_nits = {
129291
filename.strip()
130292
for filename in clean_files
131293
if filename.strip() and not filename.startswith("#")
132294
}
133295

134-
if args.check_and_annotate:
135-
check_and_annotate(warnings, args.check_and_annotate)
296+
if args.annotate_diff is not None:
297+
annotate_diff(warnings, *args.annotate_diff)
136298

137299
if args.fail_if_regression:
138300
exit_code += fail_if_regression(

0 commit comments

Comments
 (0)