{
  "Selected_candidate": {
    "pr_number": 4114,
    "pr_title": "Fixed #24319 -- Added validation for UUID model field",
    "pr_body": "",
    "issue_id": 24319,
    "issue_title": "UUIDField do not properly clean (validate) value in get_db_prep_value",
    "issue_body": "Use case\n: Using user's input to retrieve a model from database.\nIssue\n: The UUIDField doesn't properly *clean* the input value, meaning the ORM will query the database even the query values aren't cleaned.\nSystem\n: Ubuntu 14.04 LTS + PostgresSQL 9.3\nGood\n: User.objects.get(pk='ssss') -> ValueError\nBad\n: Media.objects.get(pk='ssss') -> DataError\nclass Media(models.Model):\n    pk = models.UUIDField()\n>>> User.objects.get(pk='ssss')\nTraceback (most recent call last):\n  File \"<input>\", line 1, in <module>\n  File \"venv/src/django/django/db/models/manager.py\", line 127, in manager_method\n    return getattr(self.get_queryset(), name)(*args, **kwargs)\n  File \"venv/src/django/django/db/models/query.py\", line 320, in get\n    clone = self.filter(*args, **kwargs)\n  File \"venv/src/django/django/db/models/query.py\", line 671, in filter\n    return self._filter_or_exclude(False, *args, **kwargs)\n  File \"venv/src/django/django/db/models/query.py\", line 689, in _filter_or_exclude\n    clone.query.add_q(Q(*args, **kwargs))\n  File \"venv/src/django/django/db/models/sql/query.py\", line 1284, in add_q\n    clause, require_inner = self._add_q(where_part, self.used_aliases)\n  File \"venv/src/django/django/db/models/sql/query.py\", line 1311, in _add_q\n    current_negated=current_negated, connector=connector, allow_joins=allow_joins)\n  File \"venv/src/django/django/db/models/sql/query.py\", line 1183, in build_filter\n    condition = self.build_lookup(lookups, col, value)\n  File \"venv/src/django/django/db/models/sql/query.py\", line 1079, in build_lookup\n    return final_lookup(lhs, rhs)\n  File \"venv/src/django/django/db/models/lookups.py\", line 96, in __init__\n    self.rhs = self.get_prep_lookup()\n  File \"venv/src/django/django/db/models/lookups.py\", line 134, in get_prep_lookup\n    return self.lhs.output_field.get_prep_lookup(self.lookup_name, self.rhs)\n  File \"venv/src/django/django/db/models/fields/__init__.py\", line 716, in get_prep_lookup\n    return self.get_prep_value(value)\n  File \"venv/src/django/django/db/models/fields/__init__.py\", line 974, in get_prep_value\n    return int(value)\nValueError: invalid literal for int() with base 10: 'ssss'\n>>> Media.objects.get(pk='ssss')\nTraceback (most recent call last):\n  File \"<input>\", line 1, in <module>\n  File \"venv/src/django/django/db/models/manager.py\", line 127, in manager_method\n    return getattr(self.get_queryset(), name)(*args, **kwargs)\n  File \"venv/src/django/django/db/models/query.py\", line 326, in get\n    num = len(clone)\n  File \"venv/src/django/django/db/models/query.py\", line 145, in __len__\n    self._fetch_all()\n  File \"venv/src/django/django/db/models/query.py\", line 955, in _fetch_all\n    self._result_cache = list(self.iterator())\n  File \"venv/src/django/django/db/models/query.py\", line 239, in iterator\n    results = compiler.execute_sql()\n  File \"venv/src/django/django/db/models/sql/compiler.py\", line 826, in execute_sql\n    cursor.execute(sql, params)\n  File \"venv/src/django/django/db/backends/utils.py\", line 80, in execute\n    return super(CursorDebugWrapper, self).execute(sql, params)\n  File \"venv/src/django/django/db/backends/utils.py\", line 65, in execute\n    return self.cursor.execute(sql, params)\n  File \"venv/src/django/django/db/utils.py\", line 95, in __exit__\n    six.reraise(dj_exc_type, dj_exc_value, traceback)\n  File \"venv/src/django/django/utils/six.py\", line 658, in reraise\n    raise value.with_traceback(tb)\n  File \"venv/src/django/django/db/backends/utils.py\", line 65, in execute\n    return self.cursor.execute(sql, params)\ndjango.db.utils.DataError: invalid input syntax for uuid: \"ssss\"\nLINE 1: ...oudncode_media\" WHERE \"cloudncode_media\".\"uuid\" = 'ssss' LIM...",
    "issue_closed_at": "2015-02-12T16:58:19",
    "base_commit": "d64baaef3b95abe9ae5d07317c9bf4df02cb8592",
    "changes": [
      {
        "file": "django/db/models/fields/__init__.py",
        "type": "function",
        "name": "get_internal_type",
        "class_name": "UUIDField",
        "code": "def get_internal_type(self):\n        return \"UUIDField\""
      }
    ]
  },
  "Justification": "Candidate E is the most relevant bug report because it directly relates to input validation within Django's model fields, which is similar to the input handling issue faced with URLField. Both reports concern the cleaning of user input and the adequacy of error reporting when validation fails — in this case, the CURRENT bug report details a ValueError occurring due to inadequate validation of a URL input. Candidate E's fix addresses adding necessary validation for UUID fields, which is closely aligned with fixing the URLField's clean method to handle inputs correctly, making it a pertinent resource for diagnosing and resolving the CURRENT bug.",
  "instance_id": "django__django-15202",
  "repo": "django/django",
  "created_at": "2021-12-15T15:04:13Z",
  "problem_statement": "URLField throws ValueError instead of ValidationError on clean\nDescription\n\t\nforms.URLField( ).clean('////]@N.AN')\nresults in:\n\tValueError: Invalid IPv6 URL\n\tTraceback (most recent call last):\n\t File \"basic_fuzzer.py\", line 22, in TestOneInput\n\t File \"fuzzers.py\", line 350, in test_forms_URLField\n\t File \"django/forms/fields.py\", line 151, in clean\n\t File \"django/forms/fields.py\", line 136, in run_validators\n\t File \"django/core/validators.py\", line 130, in __call__\n\t File \"urllib/parse.py\", line 440, in urlsplit\n",
  "patch": "diff --git a/django/core/validators.py b/django/core/validators.py\n--- a/django/core/validators.py\n+++ b/django/core/validators.py\n@@ -108,15 +108,16 @@ def __call__(self, value):\n             raise ValidationError(self.message, code=self.code, params={'value': value})\n \n         # Then check full URL\n+        try:\n+            splitted_url = urlsplit(value)\n+        except ValueError:\n+            raise ValidationError(self.message, code=self.code, params={'value': value})\n         try:\n             super().__call__(value)\n         except ValidationError as e:\n             # Trivial case failed. Try for possible IDN domain\n             if value:\n-                try:\n-                    scheme, netloc, path, query, fragment = urlsplit(value)\n-                except ValueError:  # for example, \"Invalid IPv6 URL\"\n-                    raise ValidationError(self.message, code=self.code, params={'value': value})\n+                scheme, netloc, path, query, fragment = splitted_url\n                 try:\n                     netloc = punycode(netloc)  # IDN -> ACE\n                 except UnicodeError:  # invalid domain part\n@@ -127,7 +128,7 @@ def __call__(self, value):\n                 raise\n         else:\n             # Now verify IPv6 in the netloc part\n-            host_match = re.search(r'^\\[(.+)\\](?::\\d{1,5})?$', urlsplit(value).netloc)\n+            host_match = re.search(r'^\\[(.+)\\](?::\\d{1,5})?$', splitted_url.netloc)\n             if host_match:\n                 potential_ip = host_match[1]\n                 try:\n@@ -139,7 +140,7 @@ def __call__(self, value):\n         # section 3.1. It's defined to be 255 bytes or less, but this includes\n         # one byte for the length of the name and one byte for the trailing dot\n         # that's used to indicate absolute names in DNS.\n-        if len(urlsplit(value).hostname) > 253:\n+        if splitted_url.hostname is None or len(splitted_url.hostname) > 253:\n             raise ValidationError(self.message, code=self.code, params={'value': value})\n \n \n"
}