2
2
"""
3
3
Check the output of running Sphinx in nit-picky mode (missing references).
4
4
"""
5
+ from __future__ import annotations
6
+
5
7
import argparse
6
- import csv
8
+ import itertools
7
9
import os
8
10
import re
11
+ import subprocess
9
12
import sys
10
13
from pathlib import Path
14
+ from typing import TextIO
11
15
12
16
# Exclude these whether they're dirty or clean,
13
17
# because they trigger a rebuild of dirty files.
24
28
"venv" ,
25
29
}
26
30
27
- PATTERN = re .compile (r"(?P<file>[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)" )
31
+ # Regex pattern to match the parts of a Sphinx warning
32
+ WARNING_PATTERN = re .compile (
33
+ r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
34
+ )
35
+
36
+ # Regex pattern to match the line numbers in a Git unified diff
37
+ DIFF_PATTERN = re .compile (
38
+ r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@" ,
39
+ flags = re .MULTILINE ,
40
+ )
41
+
42
+
43
+ def get_diff_files (ref_a : str , ref_b : str , filter_mode : str = "" ) -> set [Path ]:
44
+ """List the files changed between two Git refs, filtered by change type."""
45
+ added_files_result = subprocess .run (
46
+ [
47
+ "git" ,
48
+ "diff" ,
49
+ f"--diff-filter={ filter_mode } " ,
50
+ "--name-only" ,
51
+ f"{ ref_a } ...{ ref_b } " ,
52
+ "--" ,
53
+ ],
54
+ stdout = subprocess .PIPE ,
55
+ check = True ,
56
+ text = True ,
57
+ encoding = "UTF-8" ,
58
+ )
59
+
60
+ added_files = added_files_result .stdout .strip ().split ("\n " )
61
+ return {Path (file .strip ()) for file in added_files if file .strip ()}
62
+
63
+
64
+ def get_diff_lines (ref_a : str , ref_b : str , file : Path ) -> list [int ]:
65
+ """List the lines changed between two Git refs for a specific file."""
66
+ diff_output = subprocess .run (
67
+ [
68
+ "git" ,
69
+ "diff" ,
70
+ "--unified=0" ,
71
+ f"{ ref_a } ...{ ref_b } " ,
72
+ "--" ,
73
+ str (file ),
74
+ ],
75
+ stdout = subprocess .PIPE ,
76
+ check = True ,
77
+ text = True ,
78
+ encoding = "UTF-8" ,
79
+ )
80
+
81
+ # Scrape line offsets + lengths from diff and convert to line numbers
82
+ line_matches = DIFF_PATTERN .finditer (diff_output .stdout )
83
+ # Removed and added line counts are 1 if not printed
84
+ line_match_values = [
85
+ line_match .groupdict (default = 1 ) for line_match in line_matches
86
+ ]
87
+ line_ints = [
88
+ (int (match_value ["lineb" ]), int (match_value ["added" ]))
89
+ for match_value in line_match_values
90
+ ]
91
+ line_ranges = [
92
+ range (line_b , line_b + added ) for line_b , added in line_ints
93
+ ]
94
+ line_numbers = list (itertools .chain (* line_ranges ))
95
+
96
+ return line_numbers
97
+
98
+
99
+ def get_para_line_numbers (file_obj : TextIO ) -> list [list [int ]]:
100
+ """Get the line numbers of text in a file object, grouped by paragraph."""
101
+ paragraphs = []
102
+ prev_line = None
103
+ for lineno , line in enumerate (file_obj ):
104
+ lineno = lineno + 1
105
+ if prev_line is None or (line .strip () and not prev_line .strip ()):
106
+ paragraph = [lineno - 1 ]
107
+ paragraphs .append (paragraph )
108
+ paragraph .append (lineno )
109
+ prev_line = line
110
+ return paragraphs
111
+
112
+
113
+ def filter_and_parse_warnings (
114
+ warnings : list [str ], files : set [Path ]
115
+ ) -> list [re .Match [str ]]:
116
+ """Get the warnings matching passed files and parse them with regex."""
117
+ filtered_warnings = [
118
+ warning
119
+ for warning in warnings
120
+ if any (str (file ) in warning for file in files )
121
+ ]
122
+ warning_matches = [
123
+ WARNING_PATTERN .fullmatch (warning .strip ())
124
+ for warning in filtered_warnings
125
+ ]
126
+ non_null_matches = [warning for warning in warning_matches if warning ]
127
+ return non_null_matches
128
+
129
+
130
+ def filter_warnings_by_diff (
131
+ warnings : list [re .Match [str ]], ref_a : str , ref_b : str , file : Path
132
+ ) -> list [re .Match [str ]]:
133
+ """Filter the passed per-file warnings to just those on changed lines."""
134
+ diff_lines = get_diff_lines (ref_a , ref_b , file )
135
+ with file .open (encoding = "UTF-8" ) as file_obj :
136
+ paragraphs = get_para_line_numbers (file_obj )
137
+ touched_paras = [
138
+ para_lines
139
+ for para_lines in paragraphs
140
+ if set (diff_lines ) & set (para_lines )
141
+ ]
142
+ touched_para_lines = set (itertools .chain (* touched_paras ))
143
+ warnings_infile = [
144
+ warning for warning in warnings if str (file ) in warning ["file" ]
145
+ ]
146
+ warnings_touched = [
147
+ warning
148
+ for warning in warnings_infile
149
+ if int (warning ["line" ]) in touched_para_lines
150
+ ]
151
+ return warnings_touched
152
+
28
153
154
+ def process_touched_warnings (
155
+ warnings : list [str ], ref_a : str , ref_b : str
156
+ ) -> list [re .Match [str ]]:
157
+ """Filter a list of Sphinx warnings to those affecting touched lines."""
158
+ added_files , modified_files = tuple (
159
+ get_diff_files (ref_a , ref_b , filter_mode = mode ) for mode in ("A" , "M" )
160
+ )
161
+
162
+ warnings_added = filter_and_parse_warnings (warnings , added_files )
163
+ warnings_modified = filter_and_parse_warnings (warnings , modified_files )
164
+
165
+ modified_files_warned = {
166
+ file
167
+ for file in modified_files
168
+ if any (str (file ) in warning ["file" ] for warning in warnings_modified )
169
+ }
29
170
30
- def check_and_annotate (warnings : list [str ], files_to_check : str ) -> None :
171
+ warnings_modified_touched = [
172
+ filter_warnings_by_diff (warnings_modified , ref_a , ref_b , file )
173
+ for file in modified_files_warned
174
+ ]
175
+ warnings_touched = warnings_added + list (
176
+ itertools .chain (* warnings_modified_touched )
177
+ )
178
+
179
+ return warnings_touched
180
+
181
+
182
+ def annotate_diff (
183
+ warnings : list [str ], ref_a : str = "main" , ref_b : str = "HEAD"
184
+ ) -> None :
31
185
"""
32
- Convert Sphinx warning messages to GitHub Actions.
186
+ Convert Sphinx warning messages to GitHub Actions for changed paragraphs .
33
187
34
188
Converts lines like:
35
189
.../Doc/library/cgi.rst:98: WARNING: reference target not found
36
190
to:
37
191
::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
38
192
39
- Non-matching lines are echoed unchanged.
40
-
41
- see:
193
+ See:
42
194
https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
43
195
"""
44
- files_to_check = next (csv .reader ([files_to_check ]))
45
- for warning in warnings :
46
- if any (filename in warning for filename in files_to_check ):
47
- if match := PATTERN .fullmatch (warning ):
48
- print ("::warning file={file},line={line}::{msg}" .format_map (match ))
196
+ warnings_touched = process_touched_warnings (warnings , ref_a , ref_b )
197
+ print ("Emitting doc warnings matching modified lines:" )
198
+ for warning in warnings_touched :
199
+ print ("::warning file={file},line={line}::{msg}" .format_map (warning ))
200
+ print (warning [0 ])
201
+ if not warnings_touched :
202
+ print ("None" )
49
203
50
204
51
205
def fail_if_regression (
@@ -68,7 +222,7 @@ def fail_if_regression(
68
222
print (filename )
69
223
for warning in warnings :
70
224
if filename in warning :
71
- if match := PATTERN .fullmatch (warning ):
225
+ if match := WARNING_PATTERN .fullmatch (warning ):
72
226
print (" {line}: {msg}" .format_map (match ))
73
227
return - 1
74
228
return 0
@@ -91,12 +245,14 @@ def fail_if_improved(
91
245
return 0
92
246
93
247
94
- def main () -> int :
248
+ def main (argv : list [ str ] | None = None ) -> int :
95
249
parser = argparse .ArgumentParser ()
96
250
parser .add_argument (
97
- "--check-and-annotate" ,
98
- help = "Comma-separated list of files to check, "
99
- "and annotate those with warnings on GitHub Actions" ,
251
+ "--annotate-diff" ,
252
+ nargs = "*" ,
253
+ metavar = ("BASE_REF" , "HEAD_REF" ),
254
+ help = "Add GitHub Actions annotations on the diff for warnings on "
255
+ "lines changed between the given refs (main and HEAD, by default)" ,
100
256
)
101
257
parser .add_argument (
102
258
"--fail-if-regression" ,
@@ -108,13 +264,19 @@ def main() -> int:
108
264
action = "store_true" ,
109
265
help = "Fail if new files with no nits are found" ,
110
266
)
111
- args = parser .parse_args ()
267
+
268
+ args = parser .parse_args (argv )
269
+ if args .annotate_diff is not None and len (args .annotate_diff ) > 2 :
270
+ parser .error (
271
+ "--annotate-diff takes between 0 and 2 ref args, not "
272
+ f"{ len (args .annotate_diff )} { tuple (args .annotate_diff )} "
273
+ )
112
274
exit_code = 0
113
275
114
276
wrong_directory_msg = "Must run this script from the repo root"
115
277
assert Path ("Doc" ).exists () and Path ("Doc" ).is_dir (), wrong_directory_msg
116
278
117
- with Path ("Doc/sphinx-warnings.txt" ).open () as f :
279
+ with Path ("Doc/sphinx-warnings.txt" ).open (encoding = "UTF-8" ) as f :
118
280
warnings = f .read ().splitlines ()
119
281
120
282
cwd = str (Path .cwd ()) + os .path .sep
@@ -124,15 +286,15 @@ def main() -> int:
124
286
if "Doc/" in warning
125
287
}
126
288
127
- with Path ("Doc/tools/.nitignore" ).open () as clean_files :
289
+ with Path ("Doc/tools/.nitignore" ).open (encoding = "UTF-8" ) as clean_files :
128
290
files_with_expected_nits = {
129
291
filename .strip ()
130
292
for filename in clean_files
131
293
if filename .strip () and not filename .startswith ("#" )
132
294
}
133
295
134
- if args .check_and_annotate :
135
- check_and_annotate (warnings , args .check_and_annotate )
296
+ if args .annotate_diff is not None :
297
+ annotate_diff (warnings , * args .annotate_diff )
136
298
137
299
if args .fail_if_regression :
138
300
exit_code += fail_if_regression (
0 commit comments