Skip to content

Commit 960b48b

Browse files
fix: Valid URLs failing validation - query and fragment parts (#297)
- fix: query string and fragment validations - feat: add tests for query and fragment parts of URL - fix: formatting & default value --------- Co-authored-by: Jovial Joe Jayarson <[email protected]>
1 parent 71b40bd commit 960b48b

File tree

2 files changed

+19
-12
lines changed

2 files changed

+19
-12
lines changed

src/validators/url.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# standard
44
from functools import lru_cache
55
import re
6-
from urllib.parse import unquote, urlsplit
6+
from urllib.parse import parse_qs, unquote, urlsplit
77

88
# local
99
from .hostname import hostname
@@ -34,11 +34,6 @@ def _path_regex():
3434
)
3535

3636

37-
@lru_cache
38-
def _query_regex():
39-
return re.compile(r"&?(\w+=?[^\s&]*)", re.IGNORECASE)
40-
41-
4237
def _validate_scheme(value: str):
4338
"""Validate scheme."""
4439
# More schemes will be considered later.
@@ -108,16 +103,16 @@ def _validate_netloc(
108103
) and _validate_auth_segment(basic_auth)
109104

110105

111-
def _validate_optionals(path: str, query: str, fragment: str):
106+
def _validate_optionals(path: str, query: str, fragment: str, strict_query: bool):
112107
"""Validate path query and fragments."""
113108
optional_segments = True
114109
if path:
115110
optional_segments &= bool(_path_regex().match(path))
116-
if query:
117-
optional_segments &= bool(_query_regex().match(query))
111+
if query and parse_qs(query, strict_parsing=strict_query):
112+
optional_segments &= True
118113
if fragment:
119114
fragment = fragment.lstrip("/") if fragment.startswith("/") else fragment
120-
optional_segments &= all(char_to_avoid not in fragment for char_to_avoid in ("/", "?"))
115+
optional_segments &= all(char_to_avoid not in fragment for char_to_avoid in ("?",))
121116
return optional_segments
122117

123118

@@ -130,6 +125,7 @@ def url(
130125
skip_ipv4_addr: bool = False,
131126
may_have_port: bool = True,
132127
simple_host: bool = False,
128+
strict_query: bool = True,
133129
rfc_1034: bool = False,
134130
rfc_2782: bool = False,
135131
):
@@ -167,6 +163,8 @@ def url(
167163
URL string may contain port number.
168164
simple_host:
169165
URL string maybe only hyphens and alpha-numerals.
166+
strict_query:
167+
Fail validation on query string parsing error.
170168
rfc_1034:
171169
Allow trailing dot in domain/host name.
172170
Ref: [RFC 1034](https://www.rfc-editor.org/rfc/rfc1034).
@@ -214,5 +212,5 @@ def url(
214212
rfc_1034,
215213
rfc_2782,
216214
)
217-
and _validate_optionals(path, query, fragment)
215+
and _validate_optionals(path, query, fragment, strict_query)
218216
)

tests/test_url.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
"http://foo.com/blah_blah_(wikipedia)",
2020
"http://foo.com/blah_blah_(wikipedia)_(again)",
2121
"http://www.example.com/wpstyle/?p=364",
22-
"https://www.example.com/foo/?bar=baz&inga=42&quux",
2322
"https://www.example.com?bar=baz",
2423
"http://✪df.ws/123",
2524
"http://userid:[email protected]:8080",
@@ -85,12 +84,18 @@
8584
"http://:::::::::::::@exmp.com",
8685
"http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com",
8786
"https://exchange.jetswap.finance/#/swap",
87+
"https://www.foo.com/bar#/baz/test",
8888
# when simple_host=True
8989
# "http://localhost",
9090
# "http://localhost:8000",
9191
# "http://pc:8081/",
9292
# "http://3628126748",
9393
# "http://foobar",
94+
# when strict_query=False
95+
# "https://www.example.com/foo/?bar=baz&inga=42&quux",
96+
# "https://foo.bar.net/baz.php?-/inga/test-lenient-query/",
97+
# "https://foo.com/img/bar/baz.jpg?-62169987208",
98+
# "https://example.com/foo/?bar#!baz/inga/8SA-M3as7A8",
9499
],
95100
)
96101
def test_returns_true_on_valid_url(value: str):
@@ -144,6 +149,10 @@ def test_returns_true_on_valid_url(value: str):
144149
"http://[2010:836B:4179::836B:4179",
145150
"http://2010:836B:4179::836B:4179",
146151
"http://2010:836B:4179::836B:4179:80/index.html",
152+
"https://www.example.com/foo/?bar=baz&inga=42&quux",
153+
"https://foo.com/img/bar/baz.jpg?-62169987208",
154+
"https://foo.bar.net/baz.php?-/inga/test-lenient-query/",
155+
"https://example.com/foo/?bar#!baz/inga/8SA-M3as7A8",
147156
"http://0.00.00.00.00.00.00.00.00.00.00.00.00.00.00."
148157
+ "00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00."
149158
+ "00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00."

0 commit comments

Comments
 (0)