diff --git a/pyproject.toml b/pyproject.toml index 4471913e..83b6d633 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ include = ["CHANGES.md", "docs/*", "docs/validators.1", "validators/py.typed"] [tool.poetry.dependencies] python = "^3.8" eth-hash = {extras = ["pycryptodome"], version = "^0.5.2"} +requests = "^2.31.0" [tool.poetry.group.docs] optional = true diff --git a/tests/test_domain.py b/tests/test_domain.py index bfea791d..765e7c20 100644 --- a/tests/test_domain.py +++ b/tests/test_domain.py @@ -49,6 +49,8 @@ def test_returns_true_on_valid_domain(value: str, rfc_1034: bool, rfc_2782: bool ("123.123", False, False), ("123.123.123.", True, False), ("123.123.123.123", False, False), + ("sanpellegrino-corporate.itOLDWEBSITE", False, False), + ("sanpellegrino-corporate.itOLDWEBSITE.", True, False), ], ) def test_returns_failed_validation_on_invalid_domain(value: str, rfc_1034: bool, rfc_2782: bool): diff --git a/validators/domain.py b/validators/domain.py index 3866ab4f..733c271c 100644 --- a/validators/domain.py +++ b/validators/domain.py @@ -3,9 +3,23 @@ # standard import re +import requests # local -from .utils import validator +from validators.utils import validator + + +# Function to download the TLD list and create a set of valid TLDs +def get_valid_tlds(): + """Return a set of regularly updated valid TLDs from inaa.org .""" + response = requests.get("https://data.iana.org/TLD/tlds-alpha-by-domain.txt", timeout=30) + if response.status_code != 200: + return None + tlds = response.text.strip().split("\n")[1:] + return tlds + + +VALID_TLDS = get_valid_tlds() @validator @@ -42,11 +56,25 @@ def domain(value: str, /, *, rfc_1034: bool = False, rfc_2782: bool = False): - *In version 0.10.0*: - Added support for internationalized domain name (IDN) validation. - > *New in version 0.9.0*. + - *In version 0.21.0*: + - Added active TLD validation. + + > *New in version 0.21.0*. """ if not value: return False try: + # Check if the TLD is active + if rfc_1034 and value.endswith("."): + tld = value.rstrip(".") + _, tld = tld.rsplit(".", 1) + else: + _, tld = value.rsplit(".", 1) + + if VALID_TLDS: + if tld.upper() not in VALID_TLDS: + return False + return not re.search(r"\s", value) and re.match( # First character of the domain rf"^(?:[a-zA-Z0-9{'_'if rfc_2782 else ''}]" diff --git a/validators/email.py b/validators/email.py index 4ad23137..f1dd8b67 100644 --- a/validators/email.py +++ b/validators/email.py @@ -5,8 +5,8 @@ import re # local -from .hostname import hostname -from .utils import validator +from validators.hostname import hostname +from validators.utils import validator @validator