{
  "id": "django__django-10097",
  "question": "Make URLValidator reject invalid characters in the username and password\nDescription\n\t \n\t\t(last modified by Tim Bell)\n\t \nSince #20003, core.validators.URLValidator accepts URLs with usernames and passwords. RFC 1738 section 3.1 requires \"Within the user and password field, any \":\", \"@\", or \"/\" must be encoded\"; however, those characters are currently accepted without being %-encoded. That allows certain invalid URLs to pass validation incorrectly. (The issue originates in Diego Perini's ​gist, from which the implementation in #20003 was derived.)\nAn example URL that should be invalid is http://foo/bar@example.com; furthermore, many of the test cases in tests/validators/invalid_urls.txt would be rendered valid under the current implementation by appending a query string of the form ?m=foo@example.com to them.\nI note Tim Graham's concern about adding complexity to the validation regex. However, I take the opposite position to Danilo Bargen about invalid URL edge cases: it's not fine if invalid URLs (even so-called \"edge cases\") are accepted when the regex could be fixed simply to reject them correctly. I also note that a URL of the form above was encountered in a production setting, so that this is a genuine use case, not merely an academic exercise.\nPull request: ​https://github.com/django/django/pull/10097\nMake URLValidator reject invalid characters in the username and password\nDescription\n\t \n\t\t(last modified by Tim Bell)\n\t \nSince #20003, core.validators.URLValidator accepts URLs with usernames and passwords. RFC 1738 section 3.1 requires \"Within the user and password field, any \":\", \"@\", or \"/\" must be encoded\"; however, those characters are currently accepted without being %-encoded. That allows certain invalid URLs to pass validation incorrectly. (The issue originates in Diego Perini's ​gist, from which the implementation in #20003 was derived.)\nAn example URL that should be invalid is http://foo/bar@example.com; furthermore, many of the test cases in tests/validators/invalid_urls.txt would be rendered valid under the current implementation by appending a query string of the form ?m=foo@example.com to them.\nI note Tim Graham's concern about adding complexity to the validation regex. However, I take the opposite position to Danilo Bargen about invalid URL edge cases: it's not fine if invalid URLs (even so-called \"edge cases\") are accepted when the regex could be fixed simply to reject them correctly. I also note that a URL of the form above was encountered in a production setting, so that this is a genuine use case, not merely an academic exercise.\nPull request: ​https://github.com/django/django/pull/10097\n",
  "answer": "{\"task_id\": \"58ef4f03-5757-43aa-aff7-db239190b32e\", \"workflow_run_id\": \"583b095b-55cd-44d7-a969-314fe8cb2b06\", \"data\": {\"id\": \"583b095b-55cd-44d7-a969-314fe8cb2b06\", \"workflow_id\": \"9a0a6efa-4d15-4876-b13f-4fb6a999caa2\", \"status\": \"succeeded\", \"outputs\": {\"final_patch\": [\"--- a/django/core/validators.py\\n+++ b/django/core/validators.py\\n@@ -75,7 +75,7 @@ class URLValidator(RegexValidator):\\n         r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\\\\.)+'  # domain...\\n         r'(?:[A-Z]{2,6}\\\\.?|[A-Z0-9-]{2,}\\\\.?)|'  # host...\\n         r'localhost|'  # localhost...\\n-        r'\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3})'  # ...or ip\\n+        r'\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3}|'  # ...or ipv4\\n+        r'\\\\[?[A-F0-9]*:[A-F0-9:]+\\\\]?)'  # ...or ipv6\\n         r'(?::\\\\d+)?'  # optional port\\n         r'(?:/?|[/?]\\\\S+)\\\\Z', re.IGNORECASE)\\n     message = _('Enter a valid URL.')\\n@@ -87,11 +87,15 @@ class URLValidator(RegexValidator):\\n         # Check if the scheme is valid.\\n         scheme = value.split('://')[0].lower()\\n         if scheme not in self.schemes:\\n             raise ValidationError(self.message, code=self.code)\\n \\n+        # Check for invalid characters in userinfo (username:password)\\n+        if '://' in value:\\n+            url_parts = value.split('://', 1)[1]\\n+            if '@' in url_parts:\\n+                userinfo = url_parts.split('@')[0]\\n+                if any(char in userinfo for char in ['/', '@']) or userinfo.count(':') > 1:\\n+                    raise ValidationError(self.message, code=self.code)\\n+\\n         super().__call__(value)\", \"--- a/django/core/validators.py\\n+++ b/django/core/validators.py\\n@@ -70,7 +70,7 @@ class URLValidator(RegexValidator):\\n     regex = re.compile(\\n         r'^(?:http|ftp)s?://'  # http:// or https://\\n         r'(?:(?P<user>[^\\\\s:/@]+)(?::(?P<pass>[^\\\\s:/@]+))?@)?'  # user:pass@\\n-        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\\\\.)+'  # domain...\\n+        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\\\\.)+'  # domain...\\n         r'(?:[A-Z]{2,6}\\\\.?|[A-Z0-9-]{2,}\\\\.?)|'  # host...\\n         r'localhost|'  # localhost...\\n         r'\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3})'  # ...or ip\\n@@ -78,6 +78,13 @@ class URLValidator(RegexValidator):\\n         r'(?:/?|[/?]\\\\S+)\\\\Z', re.IGNORECASE)\\n     message = _('Enter a valid URL.')\\n     schemes = ['http', 'https', 'ftp', 'ftps']\\n+    \\n+    # Regex for validating userinfo section (no unencoded :, @, /)\\n+    userinfo_regex = re.compile(\\n+        r'^[^:/@]*(?::[^:/@]*)?$'\\n+    )\\n+    \\n+    code = 'invalid'\\n \\n     def __init__(self, schemes=None, **kwargs):\\n         super().__init__(**kwargs)\\n@@ -87,6 +94,16 @@ class URLValidator(RegexValidator):\\n         # Check if the scheme is valid.\\n         scheme = value.split('://')[0].lower()\\n         if scheme not in self.schemes:\\n             raise ValidationError(self.message, code=self.code)\\n+            \\n+        # Extract and validate userinfo if present\\n+        if '://' in value:\\n+            url_without_scheme = value.split('://', 1)[1]\\n+            if '@' in url_without_scheme:\\n+                userinfo = url_without_scheme.split('@')[0]\\n+                if not self.userinfo_regex.match(userinfo):\\n+                    raise ValidationError(self.message, code=self.code)\\n+                    \\n         super().__call__(value)\", \"--- a/django/core/validators.py\\n+++ b/django/core/validators.py\\n@@ -68,9 +68,9 @@ class URLValidator(RegexValidator):\\n     ul = '\\\\u00a1-\\\\uffff'  # unicode letters range (must not be a raw string)\\n \\n     regex = re.compile(\\n-        r'^(?:http|ftp)s?://'  # http:// or https://\\n-        r'(?:(?P<user>[^\\\\s:/@]+)(?::(?P<pass>[^\\\\s:/@]+))?@)?'  # user:pass@\\n-        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\\\\.)+'  # domain...\\n+        r'^(?:http|ftp)s?://'  # scheme\\n+        r'(?:(?P<user>[A-Za-z0-9._~!$&\\\\'()*+,;=%-]+)(?::(?P<pass>[A-Za-z0-9._~!$&\\\\'()*+,;=%-]+))?@)?'  # userinfo with proper encoding\\n+        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\\\\.)+'  # domain...\\n         r'(?:[A-Z]{2,6}\\\\.?|[A-Z0-9-]{2,}\\\\.?)|'  # host...\\n         r'localhost|'  # localhost...\\n         r'\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3}\\\\.\\\\d{1,3})'  # ...or ip\", \"--- a/django/core/validators.py\\n+++ b/django/core/validators.py\\n@@ -84,6 +84,21 @@ class URLValidator(RegexValidator):\\n     def __call__(self, value):\\n         try:\\n             super().__call__(value)\\n+            \\n+            # Additional validation for userinfo section per RFC 1738\\n+            if '://' in value:\\n+                scheme_and_rest = value.split('://', 1)\\n+                if len(scheme_and_rest) == 2:\\n+                    rest = scheme_and_rest[1]\\n+                    if '@' in rest:\\n+                        userinfo_and_host = rest.split('@', 1)\\n+                        userinfo = userinfo_and_host[0]\\n+                        \\n+                        # Check for unencoded special characters in userinfo\\n+                        if ('/' in userinfo or \\n+                            userinfo.count('@') > 0 or \\n+                            (userinfo.count(':') > 1)):\\n+                            raise ValidationError(self.message, code=self.code)\\n         except ValidationError as e:\\n             # Trivial case failed. Try for possible IDN domain\\n             if value:\", \"--- a/tests/validators/tests.py\\n+++ b/tests/validators/tests.py\\n@@ -200,6 +200,18 @@ class URLValidatorTestCase(TestCase):\\n             'http://example.com/path?query=value#fragment',\\n         ]\\n         for url in valid_urls:\\n             URLValidator()(url)\\n+            \\n+    def test_url_validator_rejects_invalid_userinfo(self):\\n+        \\\"\\\"\\\"Test that URLValidator rejects URLs with invalid characters in userinfo.\\\"\\\"\\\"\\n+        invalid_urls = [\\n+            'http://foo/bar@example.com',  # '/' in userinfo\\n+            'http://user@pass@example.com',  # '@' in userinfo  \\n+            'http://user:pass:word@example.com',  # multiple ':' in userinfo\\n+            'http://user/name@example.com',  # '/' in username\\n+        ]\\n+        validator = URLValidator()\\n+        for url in invalid_urls:\\n+            with self.assertRaises(ValidationError):\\n+                validator(url)\\n \\n     def test_url_validator_no_schemes(self):\\n         validator = URLValidator(schemes=[])\", \"diff\\n--- a/django/core/validators.py\\n+++ b/django/core/validators.py\\n@@ -84,6 +84,19 @@ class URLValidator(RegexValidator):\\n     def __call__(self, value):\\n         try:\\n             super().__call__(value)\\n+            \\n+            # Additional validation for userinfo section per RFC 1738\\n+            if '://' in value:\\n+                scheme_and_rest = value.split('://', 1)\\n+                if len(scheme_and_rest) == 2:\\n+                    rest = scheme_and_rest[1]\\n+                    if '@' in rest:\\n+                        userinfo_and_host = rest.split('@', 1)\\n+                        userinfo = userinfo_and_host[0]\\n+                        \\n+                        # Check for unencoded special characters in userinfo per RFC 1738\\n+                        if '/' in userinfo or '@' in userinfo or userinfo.count(':') > 1:\\n+                            raise ValidationError(self.message, code=self.code)\\n         except ValidationError as e:\\n             # Trivial case failed. Try for possible IDN domain\\n             if value:\\n\", \"diff\\n--- a/django/core/validators.py\\n+++ b/django/core/validators.py\\n@@ -84,6 +84,19 @@ class URLValidator(RegexValidator):\\n     def __call__(self, value):\\n         try:\\n             super().__call__(value)\\n+            \\n+            # Additional validation for userinfo section per RFC 1738\\n+            if '://' in value:\\n+                scheme_and_rest = value.split('://', 1)\\n+                if len(scheme_and_rest) == 2:\\n+                    rest = scheme_and_rest[1]\\n+                    if '@' in rest:\\n+                        userinfo_and_host = rest.split('@', 1)\\n+                        userinfo = userinfo_and_host[0]\\n+                        \\n+                        # Check for unencoded special characters in userinfo per RFC 1738\\n+                        if '/' in userinfo or '@' in userinfo or userinfo.count(':') > 1:\\n+                            raise ValidationError(self.message, code=self.code)\\n         except ValidationError as e:\\n             # Trivial case failed. Try for possible IDN domain\\n             if value:\\n\", \"diff\\n--- a/django/core/validators.py\\n+++ b/django/core/validators.py\\n@@ -84,6 +84,19 @@ class URLValidator(RegexValidator):\\n     def __call__(self, value):\\n         try:\\n             super().__call__(value)\\n+            \\n+            # Additional validation for userinfo section per RFC 1738\\n+            if '://' in value:\\n+                scheme_and_rest = value.split('://', 1)\\n+                if len(scheme_and_rest) == 2:\\n+                    rest = scheme_and_rest[1]\\n+                    if '@' in rest:\\n+                        userinfo_and_host = rest.split('@', 1)\\n+                        userinfo = userinfo_and_host[0]\\n+                        \\n+                        # Check for unencoded special characters in userinfo per RFC 1738\\n+                        if '/' in userinfo or '@' in userinfo or userinfo.count(':') > 1:\\n+                            raise ValidationError(self.message, code=self.code)\\n         except ValidationError as e:\\n             # Trivial case failed. Try for possible IDN domain\\n             if value:\\n\"]}, \"error\": \"\", \"elapsed_time\": 119.174535, \"total_tokens\": 48312, \"total_steps\": 30, \"created_at\": 1754646470, \"finished_at\": 1754646589}}"
}