{
  "instance_id": "django__django-16910",
  "repo": "django/django",
  "created_at": "2023-05-31T22:28:10Z",
  "problem_statement": "QuerySet.only() doesn't work with select_related() on a reverse OneToOneField relation.\nDescription\n\t\nOn Django 4.2 calling only() with select_related() on a query using the reverse lookup for a OneToOne relation does not generate the correct query.\nAll the fields from the related model are still included in the generated SQL.\nSample models:\nclass Main(models.Model):\n\tmain_field_1 = models.CharField(blank=True, max_length=45)\n\tmain_field_2 = models.CharField(blank=True, max_length=45)\n\tmain_field_3 = models.CharField(blank=True, max_length=45)\nclass Secondary(models.Model):\n\tmain = models.OneToOneField(Main, primary_key=True, related_name='secondary', on_delete=models.CASCADE)\n\tsecondary_field_1 = models.CharField(blank=True, max_length=45)\n\tsecondary_field_2 = models.CharField(blank=True, max_length=45)\n\tsecondary_field_3 = models.CharField(blank=True, max_length=45)\nSample code:\nMain.objects.select_related('secondary').only('main_field_1', 'secondary__secondary_field_1')\nGenerated query on Django 4.2.1:\nSELECT \"bugtest_main\".\"id\", \"bugtest_main\".\"main_field_1\", \"bugtest_secondary\".\"main_id\", \"bugtest_secondary\".\"secondary_field_1\", \"bugtest_secondary\".\"secondary_field_2\", \"bugtest_secondary\".\"secondary_field_3\" FROM \"bugtest_main\" LEFT OUTER JOIN \"bugtest_secondary\" ON (\"bugtest_main\".\"id\" = \"bugtest_secondary\".\"main_id\")\nGenerated query on Django 4.1.9:\nSELECT \"bugtest_main\".\"id\", \"bugtest_main\".\"main_field_1\", \"bugtest_secondary\".\"main_id\", \"bugtest_secondary\".\"secondary_field_1\" FROM \"bugtest_main\" LEFT OUTER JOIN \"bugtest_secondary\" ON (\"bugtest_main\".\"id\" = \"bugtest_secondary\".\"main_id\")\n",
  "patch": "diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py\n--- a/django/db/models/sql/query.py\n+++ b/django/db/models/sql/query.py\n@@ -779,7 +779,13 @@ def _get_only_select_mask(self, opts, mask, select_mask=None):\n         # Only include fields mentioned in the mask.\n         for field_name, field_mask in mask.items():\n             field = opts.get_field(field_name)\n-            field_select_mask = select_mask.setdefault(field, {})\n+            # Retrieve the actual field associated with reverse relationships\n+            # as that's what is expected in the select mask.\n+            if field in opts.related_objects:\n+                field_key = field.field\n+            else:\n+                field_key = field\n+            field_select_mask = select_mask.setdefault(field_key, {})\n             if field_mask:\n                 if not field.is_relation:\n                     raise FieldError(next(iter(field_mask)))\n",
  "similar_bug_items": [
    {
      "pr_number": 8754,
      "pr_title": "Fixed #28391 -- Fixed Cast() with CharField and max_length on MySQL.",
      "pr_body": "https://code.djangoproject.com/ticket/28391",
      "issue_id": 28391,
      "issue_title": "Cast doesn't take into account CharField's max_length on MySQL.",
      "issue_body": "",
      "issue_closed_at": "2017-07-17T14:12:39",
      "base_commit": "feeafdad02e2874e2e2f879a825d3527f6b193ad",
      "changes": [
        {
          "file": "django/db/backends/sqlite3/features.py",
          "type": "class",
          "name": "DatabaseFeatures",
          "code": "class DatabaseFeatures(BaseDatabaseFeatures):\n    # SQLite cannot handle us only partially reading from a cursor's result set\n    # and then writing the same rows to the database in another cursor. This\n    # setting ensures we always read result sets fully into memory all in one\n    # go.\n    can_use_chunked_reads = False\n    test_db_allows_multiple_connections = False\n    supports_unspecified_pk = True\n    supports_timezones = False\n    max_query_params = 999\n    supports_mixed_date_datetime_comparisons = False\n    has_bulk_insert = True\n    supports_column_check_constraints = False\n    autocommits_when_autocommit_is_off = True\n    can_introspect_decimal_field = False\n    can_introspect_positive_integer_field = True\n    can_introspect_small_integer_field = True\n    supports_transactions = True\n    atomic_transactions = False\n    can_rollback_ddl = True\n    supports_paramstyle_pyformat = False\n    supports_sequence_reset = False\n    can_clone_databases = True\n    supports_temporal_subtraction = True\n    ignores_table_name_case = True\n\n    @cached_property\n    def uses_savepoints(self):\n        return Database.sqlite_version_info >= (3, 6, 8)\n\n    @cached_property\n    def supports_index_column_ordering(self):\n        return Database.sqlite_version_info >= (3, 3, 0)\n\n    @cached_property\n    def can_release_savepoints(self):\n        return self.uses_savepoints\n\n    @cached_property\n    def can_share_in_memory_db(self):\n        return (\n            Database.__name__ == 'sqlite3.dbapi2' and\n            Database.sqlite_version_info >= (3, 7, 13)\n        )\n\n    @cached_property\n    def supports_stddev(self):\n        \"\"\"\n        Confirm support for STDDEV and related stats functions.\n\n        SQLite supports STDDEV as an extension package; so\n        connection.ops.check_expression_support() can't unilaterally\n        rule out support for STDDEV. Manually check whether the call works.\n        \"\"\"\n        with self.connection.cursor() as cursor:\n            cursor.execute('CREATE TABLE STDDEV_TEST (X INT)')\n            try:\n                cursor.execute('SELECT STDDEV(*) FROM STDDEV_TEST')\n                has_support = True\n            except utils.DatabaseError:\n                has_support = False\n            cursor.execute('DROP TABLE STDDEV_TEST')\n        return has_support"
        },
        {
          "file": "django/db/models/fields/__init__.py",
          "type": "function",
          "name": "clean",
          "class_name": "Field",
          "code": "def clean(self, value, model_instance):\n        \"\"\"\n        Convert the value's type and run validation. Validation errors\n        from to_python() and validate() are propagated. Return the correct\n        value if no error is raised.\n        \"\"\"\n        value = self.to_python(value)\n        self.validate(value, model_instance)\n        self.run_validators(value)\n        return value"
        },
        {
          "file": "django/db/models/fields/__init__.py",
          "type": "function",
          "name": "db_type",
          "class_name": "Field",
          "code": "def db_type(self, connection):\n        \"\"\"\n        Return the database column data type for this field, for the provided\n        connection.\n        \"\"\"\n        # The default implementation of this method looks at the\n        # backend-specific data_types dictionary, looking up the field by its\n        # \"internal type\".\n        #\n        # A Field class can implement the get_internal_type() method to specify\n        # which *preexisting* Django Field class it's most similar to -- i.e.,\n        # a custom field might be represented by a TEXT column type, which is\n        # the same as the TextField Django field type, which means the custom\n        # field's get_internal_type() returns 'TextField'.\n        #\n        # But the limitation of the get_internal_type() / data_types approach\n        # is that it cannot handle database column types that aren't already\n        # mapped to one of the built-in Django field types. In this case, you\n        # can implement db_type() instead of get_internal_type() to specify\n        # exactly which wacky database column type you want to use.\n        data = DictWrapper(self.__dict__, connection.ops.quote_name, \"qn_\")\n        try:\n            return connection.data_types[self.get_internal_type()] % data\n        except KeyError:\n            return None"
        },
        {
          "file": "django/db/models/functions/base.py",
          "type": "class",
          "name": "Cast",
          "code": "class Cast(Func):\n    \"\"\"Coerce an expression to a new field type.\"\"\"\n    function = 'CAST'\n    template = '%(function)s(%(expressions)s AS %(db_type)s)'\n\n    mysql_types = {\n        fields.CharField: 'char',\n        fields.IntegerField: 'signed integer',\n        fields.BigIntegerField: 'signed integer',\n        fields.SmallIntegerField: 'signed integer',\n        fields.FloatField: 'signed',\n        fields.PositiveIntegerField: 'unsigned integer',\n        fields.PositiveSmallIntegerField: 'unsigned integer',\n    }\n\n    def __init__(self, expression, output_field):\n        super().__init__(expression, output_field=output_field)\n\n    def as_sql(self, compiler, connection, **extra_context):\n        if 'db_type' not in extra_context:\n            extra_context['db_type'] = self.output_field.db_type(connection)\n        return super().as_sql(compiler, connection, **extra_context)\n\n    def as_mysql(self, compiler, connection):\n        extra_context = {}\n        output_field_class = type(self.output_field)\n        if output_field_class in self.mysql_types:\n            extra_context['db_type'] = self.mysql_types[output_field_class]\n        return self.as_sql(compiler, connection, **extra_context)\n\n    def as_postgresql(self, compiler, connection):\n        # CAST would be valid too, but the :: shortcut syntax is more readable.\n        return self.as_sql(compiler, connection, template='%(expressions)s::%(db_type)s')"
        },
        {
          "file": "django/db/models/functions/base.py",
          "type": "function",
          "name": "as_mysql",
          "class_name": "Length",
          "code": "def as_mysql(self, compiler, connection):\n        return super().as_sql(compiler, connection, function='CHAR_LENGTH')"
        }
      ]
    },
    {
      "pr_number": 7097,
      "pr_title": "Fixed #27068 -- Unified form field initial data retrieval.",
      "pr_body": "https://code.djangoproject.com/ticket/27068\n",
      "issue_id": 27068,
      "issue_title": "Acquire form's initial data more consistently",
      "issue_body": "",
      "issue_closed_at": "2016-08-18T19:51:32",
      "base_commit": "13857b45ca54a519a58361238442af84262c0d23",
      "changes": [
        {
          "file": "django/forms/boundfield.py",
          "type": "function",
          "name": "value",
          "class_name": "BoundField",
          "code": "def value(self):\n        \"\"\"\n        Returns the value for this BoundField, using the initial value if\n        the form is not bound or the data otherwise.\n        \"\"\"\n        if not self.form.is_bound:\n            data = self.initial\n        else:\n            data = self.field.bound_data(\n                self.data, self.form.initial.get(self.name, self.field.initial)\n            )\n        return self.field.prepare_value(data)"
        },
        {
          "file": "django/forms/boundfield.py",
          "type": "function",
          "name": "id_for_label",
          "class_name": "BoundField",
          "code": "def id_for_label(self):\n        \"\"\"\n        Wrapper around the field widget's `id_for_label` method.\n        Useful, for example, for focusing on this field regardless of whether\n        it has a single widget or a MultiWidget.\n        \"\"\"\n        widget = self.field.widget\n        id_ = widget.attrs.get('id') or self.auto_id\n        return widget.id_for_label(id_)"
        },
        {
          "file": "django/forms/forms.py",
          "type": "function",
          "name": "_clean_fields",
          "class_name": "BaseForm",
          "code": "def _clean_fields(self):\n        for name, field in self.fields.items():\n            # value_from_datadict() gets the data from the data dictionaries.\n            # Each widget type knows how to retrieve its own data, because some\n            # widgets split data over several HTML fields.\n            if field.disabled:\n                value = self.initial.get(name, field.initial)\n            else:\n                value = field.widget.value_from_datadict(self.data, self.files, self.add_prefix(name))\n            try:\n                if isinstance(field, FileField):\n                    initial = self.initial.get(name, field.initial)\n                    value = field.clean(value, initial)\n                else:\n                    value = field.clean(value)\n                self.cleaned_data[name] = value\n                if hasattr(self, 'clean_%s' % name):\n                    value = getattr(self, 'clean_%s' % name)()\n                    self.cleaned_data[name] = value\n            except ValidationError as e:\n                self.add_error(name, e)"
        },
        {
          "file": "django/forms/forms.py",
          "type": "function",
          "name": "changed_data",
          "class_name": "BaseForm",
          "code": "def changed_data(self):\n        data = []\n        for name, field in self.fields.items():\n            prefixed_name = self.add_prefix(name)\n            data_value = field.widget.value_from_datadict(self.data, self.files, prefixed_name)\n            if not field.show_hidden_initial:\n                initial_value = self.initial.get(name, field.initial)\n                if callable(initial_value):\n                    initial_value = initial_value()\n            else:\n                initial_prefixed_name = self.add_initial_prefix(name)\n                hidden_widget = field.hidden_widget()\n                try:\n                    initial_value = field.to_python(hidden_widget.value_from_datadict(\n                        self.data, self.files, initial_prefixed_name))\n                except ValidationError:\n                    # Always assume data has changed if validation fails.\n                    data.append(name)\n                    continue\n            if field.has_changed(initial_value, data_value):\n                data.append(name)\n        return data"
        },
        {
          "file": "django/forms/forms.py",
          "type": "function",
          "name": "visible_fields",
          "class_name": "BaseForm",
          "code": "def visible_fields(self):\n        \"\"\"\n        Returns a list of BoundField objects that aren't hidden fields.\n        The opposite of the hidden_fields() method.\n        \"\"\"\n        return [field for field in self if not field.is_hidden]"
        }
      ]
    },
    {
      "pr_number": 15563,
      "pr_title": "Fixed #33618 -- Fixed MTI updates outside of primary key chain.",
      "pr_body": "ticket-33618",
      "issue_id": 33618,
      "issue_title": "Wrong behavior on queryset update when multiple inheritance",
      "issue_body": "",
      "issue_closed_at": "2022-04-07T02:32:00",
      "base_commit": "9ffd4eae2ce7a7100c98f681e2b6ab818df384a4",
      "changes": [
        {
          "file": "django/db/models/sql/compiler.py",
          "type": "function",
          "name": "pre_sql_setup",
          "class_name": "SQLUpdateCompiler",
          "code": "def pre_sql_setup(self):\n        \"\"\"\n        If the update depends on results from other tables, munge the \"where\"\n        conditions to match the format required for (portable) SQL updates.\n\n        If multiple updates are required, pull out the id values to update at\n        this point so that they don't change as a result of the progressive\n        updates.\n        \"\"\"\n        refcounts_before = self.query.alias_refcount.copy()\n        # Ensure base table is in the query\n        self.query.get_initial_alias()\n        count = self.query.count_active_tables()\n        if not self.query.related_updates and count == 1:\n            return\n        query = self.query.chain(klass=Query)\n        query.select_related = False\n        query.clear_ordering(force=True)\n        query.extra = {}\n        query.select = []\n        query.add_fields([query.get_meta().pk.name])\n        super().pre_sql_setup()\n\n        must_pre_select = (\n            count > 1 and not self.connection.features.update_can_self_select\n        )\n\n        # Now we adjust the current query: reset the where clause and get rid\n        # of all the tables we don't need (since they're in the sub-select).\n        self.query.clear_where()\n        if self.query.related_updates or must_pre_select:\n            # Either we're using the idents in multiple update queries (so\n            # don't want them to change), or the db backend doesn't support\n            # selecting from the updating table (e.g. MySQL).\n            idents = []\n            for rows in query.get_compiler(self.using).execute_sql(MULTI):\n                idents.extend(r[0] for r in rows)\n            self.query.add_filter(\"pk__in\", idents)\n            self.query.related_ids = idents\n        else:\n            # The fast path. Filters and updates in one query.\n            self.query.add_filter(\"pk__in\", query)\n        self.query.reset_refcounts(refcounts_before)"
        },
        {
          "file": "django/db/models/sql/compiler.py",
          "type": "function",
          "name": "pre_sql_setup",
          "class_name": "SQLUpdateCompiler",
          "code": "def pre_sql_setup(self):\n        \"\"\"\n        If the update depends on results from other tables, munge the \"where\"\n        conditions to match the format required for (portable) SQL updates.\n\n        If multiple updates are required, pull out the id values to update at\n        this point so that they don't change as a result of the progressive\n        updates.\n        \"\"\"\n        refcounts_before = self.query.alias_refcount.copy()\n        # Ensure base table is in the query\n        self.query.get_initial_alias()\n        count = self.query.count_active_tables()\n        if not self.query.related_updates and count == 1:\n            return\n        query = self.query.chain(klass=Query)\n        query.select_related = False\n        query.clear_ordering(force=True)\n        query.extra = {}\n        query.select = []\n        query.add_fields([query.get_meta().pk.name])\n        super().pre_sql_setup()\n\n        must_pre_select = (\n            count > 1 and not self.connection.features.update_can_self_select\n        )\n\n        # Now we adjust the current query: reset the where clause and get rid\n        # of all the tables we don't need (since they're in the sub-select).\n        self.query.clear_where()\n        if self.query.related_updates or must_pre_select:\n            # Either we're using the idents in multiple update queries (so\n            # don't want them to change), or the db backend doesn't support\n            # selecting from the updating table (e.g. MySQL).\n            idents = []\n            for rows in query.get_compiler(self.using).execute_sql(MULTI):\n                idents.extend(r[0] for r in rows)\n            self.query.add_filter(\"pk__in\", idents)\n            self.query.related_ids = idents\n        else:\n            # The fast path. Filters and updates in one query.\n            self.query.add_filter(\"pk__in\", query)\n        self.query.reset_refcounts(refcounts_before)"
        },
        {
          "file": "django/db/models/sql/subqueries.py",
          "type": "function",
          "name": "get_related_updates",
          "class_name": "UpdateQuery",
          "code": "def get_related_updates(self):\n        \"\"\"\n        Return a list of query objects: one for each update required to an\n        ancestor model. Each query will have the same filtering conditions as\n        the current query but will only update a single table.\n        \"\"\"\n        if not self.related_updates:\n            return []\n        result = []\n        for model, values in self.related_updates.items():\n            query = UpdateQuery(model)\n            query.values = values\n            if self.related_ids is not None:\n                query.add_filter(\"pk__in\", self.related_ids)\n            result.append(query)\n        return result"
        }
      ]
    },
    {
      "pr_number": 9112,
      "pr_title": "Fixed #27846 -- clear all cached reverse relationships on refresh_from_db()",
      "pr_body": "https://code.djangoproject.com/ticket/27846",
      "issue_id": 27846,
      "issue_title": "refresh_from_db() doesn't clear reverse OneToOneFields",
      "issue_body": "",
      "issue_closed_at": "2017-10-12T16:25:22",
      "base_commit": "df0aebc893973c78d7d2cda712ba4133dbe29b6e",
      "changes": [
        {
          "file": "django/db/models/base.py",
          "type": "function",
          "name": "refresh_from_db",
          "class_name": "Model",
          "code": "def refresh_from_db(self, using=None, fields=None):\n        \"\"\"\n        Reload field values from the database.\n\n        By default, the reloading happens from the database this instance was\n        loaded from, or by the read router if this instance wasn't loaded from\n        any database. The using parameter will override the default.\n\n        Fields can be used to specify which fields to reload. The fields\n        should be an iterable of field attnames. If fields is None, then\n        all non-deferred fields are reloaded.\n\n        When accessing deferred fields of an instance, the deferred loading\n        of the field will call this method.\n        \"\"\"\n        if fields is not None:\n            if len(fields) == 0:\n                return\n            if any(LOOKUP_SEP in f for f in fields):\n                raise ValueError(\n                    'Found \"%s\" in fields argument. Relations and transforms '\n                    'are not allowed in fields.' % LOOKUP_SEP)\n\n        db = using if using is not None else self._state.db\n        db_instance_qs = self.__class__._default_manager.using(db).filter(pk=self.pk)\n\n        # Use provided fields, if not set then reload all non-deferred fields.\n        deferred_fields = self.get_deferred_fields()\n        if fields is not None:\n            fields = list(fields)\n            db_instance_qs = db_instance_qs.only(*fields)\n        elif deferred_fields:\n            fields = [f.attname for f in self._meta.concrete_fields\n                      if f.attname not in deferred_fields]\n            db_instance_qs = db_instance_qs.only(*fields)\n\n        db_instance = db_instance_qs.get()\n        non_loaded_fields = db_instance.get_deferred_fields()\n        for field in self._meta.concrete_fields:\n            if field.attname in non_loaded_fields:\n                # This field wasn't refreshed - skip ahead.\n                continue\n            setattr(self, field.attname, getattr(db_instance, field.attname))\n            # Throw away stale foreign key references.\n            if field.is_relation and field.is_cached(self):\n                rel_instance = field.get_cached_value(self)\n                local_val = getattr(db_instance, field.attname)\n                related_val = None if rel_instance is None else getattr(rel_instance, field.target_field.attname)\n                if local_val != related_val or (local_val is None and related_val is None):\n                    field.delete_cached_value(self)\n        self._state.db = db_instance._state.db"
        }
      ]
    },
    {
      "pr_number": 8905,
      "pr_title": "Fixed #28375 -- Fixed KeyError raised on reverse prefetch of a model with OneToOne primary key to non-pk field",
      "pr_body": "https://code.djangoproject.com/ticket/28375",
      "issue_id": 28375,
      "issue_title": "QuerySet.prefetch_related() crashes with KeyError if model uses to_field and string primary key",
      "issue_body": "",
      "issue_closed_at": "2017-08-21T15:47:49",
      "base_commit": "b5ad5c628a0327c2208d76e5cacb3cb6f48750b5",
      "changes": [
        {
          "file": "django/db/models/fields/related_descriptors.py",
          "type": "function",
          "name": "get_prefetch_queryset",
          "class_name": "ManyRelatedManager",
          "code": "def get_prefetch_queryset(self, instances, queryset=None):\n            if queryset is None:\n                queryset = super().get_queryset()\n\n            queryset._add_hints(instance=instances[0])\n            queryset = queryset.using(queryset._db or self._db)\n\n            query = {'%s__in' % self.query_field_name: instances}\n            queryset = queryset._next_is_sticky().filter(**query)\n\n            # M2M: need to annotate the query in order to get the primary model\n            # that the secondary model was actually related to. We know that\n            # there will already be a join on the join table, so we can just add\n            # the select.\n\n            # For non-autocreated 'through' models, can't assume we are\n            # dealing with PK values.\n            fk = self.through._meta.get_field(self.source_field_name)\n            join_table = fk.model._meta.db_table\n            connection = connections[queryset.db]\n            qn = connection.ops.quote_name\n            queryset = queryset.extra(select={\n                '_prefetch_related_val_%s' % f.attname:\n                '%s.%s' % (qn(join_table), qn(f.column)) for f in fk.local_related_fields})\n            return (\n                queryset,\n                lambda result: tuple(\n                    getattr(result, '_prefetch_related_val_%s' % f.attname)\n                    for f in fk.local_related_fields\n                ),\n                lambda inst: tuple(\n                    f.get_db_prep_value(getattr(inst, f.attname), connection)\n                    for f in fk.foreign_related_fields\n                ),\n                False,\n                self.prefetch_cache_name,\n                False,\n            )"
        }
      ]
    }
  ]
}