{
  "Selected_candidate": {
    "pr_number": 10715,
    "pr_title": "Fixed #29932 -- Fixed combining compound queries with sub compound queries on SQLite and Oracle.",
    "pr_body": "Ticket [29932](https://code.djangoproject.com/ticket/29932).",
    "issue_id": 29932,
    "issue_title": "QuerySet.difference() after intersection() returns incorrect results on SQLite and Oracle",
    "issue_body": "Considering a simple model\nfrom\ndjango.db\nimport\nmodels\nclass\nFoo\n(\nmodels\n.\nModel\n):\npass\nHere is the minimal way to encounter this issue\nFoo\n.\nobjects\n.\ncreate\n(\npk\n=\n1\n)\nFoo\n.\nobjects\n.\ncreate\n(\npk\n=\n2\n)\na\n=\nFoo\n.\nobjects\n.\nall\n()\nb\n=\nFoo\n.\nobjects\n.\nintersection\n(\nFoo\n.\nobjects\n.\nfilter\n(\npk\n=\n1\n))\nassert\na\n.\ncount\n()\n==\n2\nassert\nb\n.\ncount\n()\n==\n1\ndiff\n=\na\n.\ndifference\n(\nb\n)\nassert\ndiff\n.\nexists\n()\n# fails with SQLite!\nThis operation however works as expected on PostgreSQL.",
    "issue_closed_at": "2018-12-06T14:51:03",
    "base_commit": "ae180fa4b7f927a4aeae772975927c9888bb0cb0",
    "changes": [
      {
        "file": "django/db/backends/sqlite3/features.py",
        "type": "class",
        "name": "DatabaseFeatures",
        "code": "class DatabaseFeatures(BaseDatabaseFeatures):\n    # SQLite can read from a cursor since SQLite 3.6.5, subject to the caveat\n    # that statements within a connection aren't isolated from each other. See\n    # https://sqlite.org/isolation.html.\n    can_use_chunked_reads = True\n    test_db_allows_multiple_connections = False\n    supports_unspecified_pk = True\n    supports_timezones = False\n    max_query_params = 999\n    supports_mixed_date_datetime_comparisons = False\n    autocommits_when_autocommit_is_off = sys.version_info < (3, 6)\n    can_introspect_decimal_field = False\n    can_introspect_duration_field = False\n    can_introspect_positive_integer_field = True\n    can_introspect_small_integer_field = True\n    supports_transactions = True\n    atomic_transactions = False\n    can_rollback_ddl = True\n    supports_atomic_references_rename = False\n    supports_paramstyle_pyformat = False\n    supports_sequence_reset = False\n    can_clone_databases = True\n    supports_temporal_subtraction = True\n    ignores_table_name_case = True\n    supports_cast_with_precision = False\n    time_cast_precision = 3\n    can_release_savepoints = True\n    supports_partial_indexes = Database.version_info >= (3, 8, 0)\n    # Is \"ALTER TABLE ... RENAME COLUMN\" supported?\n    can_alter_table_rename_column = Database.sqlite_version_info >= (3, 25, 0)\n\n    @cached_property\n    def supports_stddev(self):\n        \"\"\"\n        Confirm support for STDDEV and related stats functions.\n\n        SQLite supports STDDEV as an extension package; so\n        connection.ops.check_expression_support() can't unilaterally\n        rule out support for STDDEV. Manually check whether the call works.\n        \"\"\"\n        with self.connection.cursor() as cursor:\n            cursor.execute('CREATE TABLE STDDEV_TEST (X INT)')\n            try:\n                cursor.execute('SELECT STDDEV(*) FROM STDDEV_TEST')\n                has_support = True\n            except utils.DatabaseError:\n                has_support = False\n            cursor.execute('DROP TABLE STDDEV_TEST')\n        return has_support"
      },
      {
        "file": "django/db/models/sql/compiler.py",
        "type": "function",
        "name": "get_combinator_sql",
        "class_name": "SQLCompiler",
        "code": "def get_combinator_sql(self, combinator, all):\n        features = self.connection.features\n        compilers = [\n            query.get_compiler(self.using, self.connection)\n            for query in self.query.combined_queries if not query.is_empty()\n        ]\n        if not features.supports_slicing_ordering_in_compound:\n            for query, compiler in zip(self.query.combined_queries, compilers):\n                if query.low_mark or query.high_mark:\n                    raise DatabaseError('LIMIT/OFFSET not allowed in subqueries of compound statements.')\n                if compiler.get_order_by():\n                    raise DatabaseError('ORDER BY not allowed in subqueries of compound statements.')\n        parts = ()\n        for compiler in compilers:\n            try:\n                # If the columns list is limited, then all combined queries\n                # must have the same columns list. Set the selects defined on\n                # the query on all combined queries, if not already set.\n                if not compiler.query.values_select and self.query.values_select:\n                    compiler.query.set_values((\n                        *self.query.extra_select,\n                        *self.query.values_select,\n                        *self.query.annotation_select,\n                    ))\n                parts += (compiler.as_sql(),)\n            except EmptyResultSet:\n                # Omit the empty queryset with UNION and with DIFFERENCE if the\n                # first queryset is nonempty.\n                if combinator == 'union' or (combinator == 'difference' and parts):\n                    continue\n                raise\n        if not parts:\n            raise EmptyResultSet\n        combinator_sql = self.connection.ops.set_operators[combinator]\n        if all and combinator == 'union':\n            combinator_sql += ' ALL'\n        braces = '({})' if features.supports_slicing_ordering_in_compound else '{}'\n        sql_parts, args_parts = zip(*((braces.format(sql), args) for sql, args in parts))\n        result = [' {} '.format(combinator_sql).join(sql_parts)]\n        params = []\n        for part in args_parts:\n            params.extend(part)\n        return result, params"
      }
    ]
  },
  "Justification": "Candidate A is the most helpful because it involves complex Django queries that can lead to unexpected results based on how Django interacts with the database, similar to the use case in the CURRENT bug report. Both reports deal with subqueries and potential ambiguities involving fields across related models, which aligns closely with the CURRENT bug's issue regarding ambiguous \"GROUP BY\" clauses in the SQL generated by Django. The insights gained from Candidate A's resolution, particularly regarding handling SQL operations on different database backends, could provide valuable context for tackling the CURRENT bug.",
  "instance_id": "django__django-12589",
  "repo": "django/django",
  "created_at": "2020-03-19T19:04:17Z",
  "problem_statement": "Django 3.0: \"GROUP BY\" clauses error with tricky field annotation\nDescription\n\t\nLet's pretend that we have next model structure with next model's relations:\nclass A(models.Model):\n\tbs = models.ManyToManyField('B',\n\t\t\t\t\t\t\t\trelated_name=\"a\",\n\t\t\t\t\t\t\t\tthrough=\"AB\")\nclass B(models.Model):\n\tpass\nclass AB(models.Model):\n\ta = models.ForeignKey(A, on_delete=models.CASCADE, related_name=\"ab_a\")\n\tb = models.ForeignKey(B, on_delete=models.CASCADE, related_name=\"ab_b\")\n\tstatus = models.IntegerField()\nclass C(models.Model):\n\ta = models.ForeignKey(\n\t\tA,\n\t\tnull=True,\n\t\tblank=True,\n\t\ton_delete=models.SET_NULL,\n\t\trelated_name=\"c\",\n\t\tverbose_name=_(\"a\")\n\t)\n\tstatus = models.IntegerField()\nLet's try to evaluate next query\nab_query = AB.objects.filter(a=OuterRef(\"pk\"), b=1)\nfilter_conditions = Q(pk=1) | Q(ab_a__b=1)\nquery = A.objects.\\\n\tfilter(filter_conditions).\\\n\tannotate(\n\t\tstatus=Subquery(ab_query.values(\"status\")),\n\t\tc_count=Count(\"c\"),\n)\nanswer = query.values(\"status\").annotate(total_count=Count(\"status\"))\nprint(answer.query)\nprint(answer)\nOn Django 3.0.4 we have an error\ndjango.db.utils.ProgrammingError: column reference \"status\" is ambiguous\nand query is next:\nSELECT (SELECT U0.\"status\" FROM \"test_app_ab\" U0 WHERE (U0.\"a_id\" = \"test_app_a\".\"id\" AND U0.\"b_id\" = 1)) AS \"status\", COUNT((SELECT U0.\"status\" FROM \"test_app_ab\" U0 WHERE (U0.\"a_id\" = \"test_app_a\".\"id\" AND U0.\"b_id\" = 1))) AS \"total_count\" FROM \"test_app_a\" LEFT OUTER JOIN \"test_app_ab\" ON (\"test_app_a\".\"id\" = \"test_app_ab\".\"a_id\") LEFT OUTER JOIN \"test_app_c\" ON (\"test_app_a\".\"id\" = \"test_app_c\".\"a_id\") WHERE (\"test_app_a\".\"id\" = 1 OR \"test_app_ab\".\"b_id\" = 1) GROUP BY \"status\"\nHowever, Django 2.2.11 processed this query properly with the next query:\nSELECT (SELECT U0.\"status\" FROM \"test_app_ab\" U0 WHERE (U0.\"a_id\" = (\"test_app_a\".\"id\") AND U0.\"b_id\" = 1)) AS \"status\", COUNT((SELECT U0.\"status\" FROM \"test_app_ab\" U0 WHERE (U0.\"a_id\" = (\"test_app_a\".\"id\") AND U0.\"b_id\" = 1))) AS \"total_count\" FROM \"test_app_a\" LEFT OUTER JOIN \"test_app_ab\" ON (\"test_app_a\".\"id\" = \"test_app_ab\".\"a_id\") LEFT OUTER JOIN \"test_app_c\" ON (\"test_app_a\".\"id\" = \"test_app_c\".\"a_id\") WHERE (\"test_app_a\".\"id\" = 1 OR \"test_app_ab\".\"b_id\" = 1) GROUP BY (SELECT U0.\"status\" FROM \"test_app_ab\" U0 WHERE (U0.\"a_id\" = (\"test_app_a\".\"id\") AND U0.\"b_id\" = 1))\nso, the difference in \"GROUP BY\" clauses\n(as DB provider uses \"django.db.backends.postgresql\", postgresql 11)\n",
  "patch": "diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py\n--- a/django/db/models/sql/query.py\n+++ b/django/db/models/sql/query.py\n@@ -1927,6 +1927,19 @@ def set_group_by(self, allow_aliases=True):\n         primary key, and the query would be equivalent, the optimization\n         will be made automatically.\n         \"\"\"\n+        # Column names from JOINs to check collisions with aliases.\n+        if allow_aliases:\n+            column_names = set()\n+            seen_models = set()\n+            for join in list(self.alias_map.values())[1:]:  # Skip base table.\n+                model = join.join_field.related_model\n+                if model not in seen_models:\n+                    column_names.update({\n+                        field.column\n+                        for field in model._meta.local_concrete_fields\n+                    })\n+                    seen_models.add(model)\n+\n         group_by = list(self.select)\n         if self.annotation_select:\n             for alias, annotation in self.annotation_select.items():\n@@ -1940,7 +1953,7 @@ def set_group_by(self, allow_aliases=True):\n                     warnings.warn(msg, category=RemovedInDjango40Warning)\n                     group_by_cols = annotation.get_group_by_cols()\n                 else:\n-                    if not allow_aliases:\n+                    if not allow_aliases or alias in column_names:\n                         alias = None\n                     group_by_cols = annotation.get_group_by_cols(alias=alias)\n                 group_by.extend(group_by_cols)\n"
}