{
  "instance_id": "django__django-17051",
  "repo": "django/django",
  "created_at": "2023-07-07T11:01:09Z",
  "problem_statement": "Allow returning IDs in QuerySet.bulk_create() when updating conflicts.\nDescription\n\t\nCurrently, when using bulk_create with a conflict handling flag turned on (e.g. ignore_conflicts or update_conflicts), the primary keys are not set in the returned queryset, as documented in bulk_create.\nWhile I understand using ignore_conflicts can lead to PostgreSQL not returning the IDs when a row is ignored (see ​this SO thread), I don't understand why we don't return the IDs in the case of update_conflicts.\nFor instance:\nMyModel.objects.bulk_create([MyModel(...)], update_conflicts=True, update_fields=[...], unique_fields=[...])\ngenerates a query without a RETURNING my_model.id part:\nINSERT INTO \"my_model\" (...)\nVALUES (...)\n\tON CONFLICT(...) DO UPDATE ...\nIf I append the RETURNING my_model.id clause, the query is indeed valid and the ID is returned (checked with PostgreSQL).\nI investigated a bit and ​this in Django source is where the returning_fields gets removed.\nI believe we could discriminate the cases differently so as to keep those returning_fields in the case of update_conflicts.\nThis would be highly helpful when using bulk_create as a bulk upsert feature.\n",
  "patch": "diff --git a/django/db/models/query.py b/django/db/models/query.py\n--- a/django/db/models/query.py\n+++ b/django/db/models/query.py\n@@ -1837,12 +1837,17 @@ def _batched_insert(\n         inserted_rows = []\n         bulk_return = connection.features.can_return_rows_from_bulk_insert\n         for item in [objs[i : i + batch_size] for i in range(0, len(objs), batch_size)]:\n-            if bulk_return and on_conflict is None:\n+            if bulk_return and (\n+                on_conflict is None or on_conflict == OnConflict.UPDATE\n+            ):\n                 inserted_rows.extend(\n                     self._insert(\n                         item,\n                         fields=fields,\n                         using=self.db,\n+                        on_conflict=on_conflict,\n+                        update_fields=update_fields,\n+                        unique_fields=unique_fields,\n                         returning_fields=self.model._meta.db_returning_fields,\n                     )\n                 )\n",
  "similar_bug_items": [
    {
      "pr_number": 16317,
      "pr_title": "Fixed #34177 -- Fixed QuerySet.bulk_create() crash on \"pk\" in unique_fields.",
      "pr_body": "Noticed when reviewing #16315.\r\n\r\nBug in 0f6946495a8ec955b471ca1baaf408ceb53d4796.\r\n\r\nticket-34177",
      "issue_id": 34177,
      "issue_title": "QuerySet.bulk_create() crashes on \"pk\" in unique_fields.",
      "issue_body": "QuerySet.bulk_create()\ncrashes on\n\"pk\"\nin\nunique_fields\nwhich should be allowed.\nFile \"/django/django/db/backends/utils.py\", line 89, in _execute\n    return self.cursor.execute(sql, params)\ndjango.db.utils.ProgrammingError: column \"pk\" does not exist\nLINE 1: ...S (3127, 3, 3, 'c'), (3128, 4, 4, 'd') ON CONFLICT(\"pk\") DO ...\nBug in\n0f6946495a8ec955b471ca1baaf408ceb53d4796\n.",
      "issue_closed_at": "2022-11-22T07:27:06",
      "base_commit": "744a1af7f943106e30d538e6ace55c2c66ccd791",
      "changes": [
        {
          "file": "django/db/models/query.py",
          "type": "function",
          "name": "_check_bulk_create_options",
          "class_name": "QuerySet",
          "code": "def _check_bulk_create_options(\n        self, ignore_conflicts, update_conflicts, update_fields, unique_fields\n    ):\n        if ignore_conflicts and update_conflicts:\n            raise ValueError(\n                \"ignore_conflicts and update_conflicts are mutually exclusive.\"\n            )\n        db_features = connections[self.db].features\n        if ignore_conflicts:\n            if not db_features.supports_ignore_conflicts:\n                raise NotSupportedError(\n                    \"This database backend does not support ignoring conflicts.\"\n                )\n            return OnConflict.IGNORE\n        elif update_conflicts:\n            if not db_features.supports_update_conflicts:\n                raise NotSupportedError(\n                    \"This database backend does not support updating conflicts.\"\n                )\n            if not update_fields:\n                raise ValueError(\n                    \"Fields that will be updated when a row insertion fails \"\n                    \"on conflicts must be provided.\"\n                )\n            if unique_fields and not db_features.supports_update_conflicts_with_target:\n                raise NotSupportedError(\n                    \"This database backend does not support updating \"\n                    \"conflicts with specifying unique fields that can trigger \"\n                    \"the upsert.\"\n                )\n            if not unique_fields and db_features.supports_update_conflicts_with_target:\n                raise ValueError(\n                    \"Unique fields that can trigger the upsert must be provided.\"\n                )\n            # Updating primary keys and non-concrete fields is forbidden.\n            update_fields = [self.model._meta.get_field(name) for name in update_fields]\n            if any(not f.concrete or f.many_to_many for f in update_fields):\n                raise ValueError(\n                    \"bulk_create() can only be used with concrete fields in \"\n                    \"update_fields.\"\n                )\n            if any(f.primary_key for f in update_fields):\n                raise ValueError(\n                    \"bulk_create() cannot be used with primary keys in \"\n                    \"update_fields.\"\n                )\n            if unique_fields:\n                # Primary key is allowed in unique_fields.\n                unique_fields = [\n                    self.model._meta.get_field(name)\n                    for name in unique_fields\n                    if name != \"pk\"\n                ]\n                if any(not f.concrete or f.many_to_many for f in unique_fields):\n                    raise ValueError(\n                        \"bulk_create() can only be used with concrete fields \"\n                        \"in unique_fields.\"\n                    )\n            return OnConflict.UPDATE\n        return None"
        },
        {
          "file": "django/db/models/query.py",
          "type": "function",
          "name": "bulk_create",
          "class_name": "QuerySet",
          "code": "def bulk_create(\n        self,\n        objs,\n        batch_size=None,\n        ignore_conflicts=False,\n        update_conflicts=False,\n        update_fields=None,\n        unique_fields=None,\n    ):\n        \"\"\"\n        Insert each of the instances into the database. Do *not* call\n        save() on each of the instances, do not send any pre/post_save\n        signals, and do not set the primary key attribute if it is an\n        autoincrement field (except if features.can_return_rows_from_bulk_insert=True).\n        Multi-table models are not supported.\n        \"\"\"\n        # When you bulk insert you don't get the primary keys back (if it's an\n        # autoincrement, except if can_return_rows_from_bulk_insert=True), so\n        # you can't insert into the child tables which references this. There\n        # are two workarounds:\n        # 1) This could be implemented if you didn't have an autoincrement pk\n        # 2) You could do it by doing O(n) normal inserts into the parent\n        #    tables to get the primary keys back and then doing a single bulk\n        #    insert into the childmost table.\n        # We currently set the primary keys on the objects when using\n        # PostgreSQL via the RETURNING ID clause. It should be possible for\n        # Oracle as well, but the semantics for extracting the primary keys is\n        # trickier so it's not done yet.\n        if batch_size is not None and batch_size <= 0:\n            raise ValueError(\"Batch size must be a positive integer.\")\n        # Check that the parents share the same concrete model with the our\n        # model to detect the inheritance pattern ConcreteGrandParent ->\n        # MultiTableParent -> ProxyChild. Simply checking self.model._meta.proxy\n        # would not identify that case as involving multiple tables.\n        for parent in self.model._meta.get_parent_list():\n            if parent._meta.concrete_model is not self.model._meta.concrete_model:\n                raise ValueError(\"Can't bulk create a multi-table inherited model\")\n        if not objs:\n            return objs\n        on_conflict = self._check_bulk_create_options(\n            ignore_conflicts,\n            update_conflicts,\n            update_fields,\n            unique_fields,\n        )\n        self._for_write = True\n        opts = self.model._meta\n        fields = opts.concrete_fields\n        objs = list(objs)\n        self._prepare_for_bulk_create(objs)\n        with transaction.atomic(using=self.db, savepoint=False):\n            objs_with_pk, objs_without_pk = partition(lambda o: o.pk is None, objs)\n            if objs_with_pk:\n                returned_columns = self._batched_insert(\n                    objs_with_pk,\n                    fields,\n                    batch_size,\n                    on_conflict=on_conflict,\n                    update_fields=update_fields,\n                    unique_fields=unique_fields,\n                )\n                for obj_with_pk, results in zip(objs_with_pk, returned_columns):\n                    for result, field in zip(results, opts.db_returning_fields):\n                        if field != opts.pk:\n                            setattr(obj_with_pk, field.attname, result)\n                for obj_with_pk in objs_with_pk:\n                    obj_with_pk._state.adding = False\n                    obj_with_pk._state.db = self.db\n            if objs_without_pk:\n                fields = [f for f in fields if not isinstance(f, AutoField)]\n                returned_columns = self._batched_insert(\n                    objs_without_pk,\n                    fields,\n                    batch_size,\n                    on_conflict=on_conflict,\n                    update_fields=update_fields,\n                    unique_fields=unique_fields,\n                )\n                connection = connections[self.db]\n                if (\n                    connection.features.can_return_rows_from_bulk_insert\n                    and on_conflict is None\n                ):\n                    assert len(returned_columns) == len(objs_without_pk)\n                for obj_without_pk, results in zip(objs_without_pk, returned_columns):\n                    for result, field in zip(results, opts.db_returning_fields):\n                        setattr(obj_without_pk, field.attname, result)\n                    obj_without_pk._state.adding = False\n                    obj_without_pk._state.db = self.db\n\n        return objs"
        },
        {
          "file": "django/db/models/query.py",
          "type": "function",
          "name": "bulk_create",
          "class_name": "QuerySet",
          "code": "def bulk_create(\n        self,\n        objs,\n        batch_size=None,\n        ignore_conflicts=False,\n        update_conflicts=False,\n        update_fields=None,\n        unique_fields=None,\n    ):\n        \"\"\"\n        Insert each of the instances into the database. Do *not* call\n        save() on each of the instances, do not send any pre/post_save\n        signals, and do not set the primary key attribute if it is an\n        autoincrement field (except if features.can_return_rows_from_bulk_insert=True).\n        Multi-table models are not supported.\n        \"\"\"\n        # When you bulk insert you don't get the primary keys back (if it's an\n        # autoincrement, except if can_return_rows_from_bulk_insert=True), so\n        # you can't insert into the child tables which references this. There\n        # are two workarounds:\n        # 1) This could be implemented if you didn't have an autoincrement pk\n        # 2) You could do it by doing O(n) normal inserts into the parent\n        #    tables to get the primary keys back and then doing a single bulk\n        #    insert into the childmost table.\n        # We currently set the primary keys on the objects when using\n        # PostgreSQL via the RETURNING ID clause. It should be possible for\n        # Oracle as well, but the semantics for extracting the primary keys is\n        # trickier so it's not done yet.\n        if batch_size is not None and batch_size <= 0:\n            raise ValueError(\"Batch size must be a positive integer.\")\n        # Check that the parents share the same concrete model with the our\n        # model to detect the inheritance pattern ConcreteGrandParent ->\n        # MultiTableParent -> ProxyChild. Simply checking self.model._meta.proxy\n        # would not identify that case as involving multiple tables.\n        for parent in self.model._meta.get_parent_list():\n            if parent._meta.concrete_model is not self.model._meta.concrete_model:\n                raise ValueError(\"Can't bulk create a multi-table inherited model\")\n        if not objs:\n            return objs\n        on_conflict = self._check_bulk_create_options(\n            ignore_conflicts,\n            update_conflicts,\n            update_fields,\n            unique_fields,\n        )\n        self._for_write = True\n        opts = self.model._meta\n        fields = opts.concrete_fields\n        objs = list(objs)\n        self._prepare_for_bulk_create(objs)\n        with transaction.atomic(using=self.db, savepoint=False):\n            objs_with_pk, objs_without_pk = partition(lambda o: o.pk is None, objs)\n            if objs_with_pk:\n                returned_columns = self._batched_insert(\n                    objs_with_pk,\n                    fields,\n                    batch_size,\n                    on_conflict=on_conflict,\n                    update_fields=update_fields,\n                    unique_fields=unique_fields,\n                )\n                for obj_with_pk, results in zip(objs_with_pk, returned_columns):\n                    for result, field in zip(results, opts.db_returning_fields):\n                        if field != opts.pk:\n                            setattr(obj_with_pk, field.attname, result)\n                for obj_with_pk in objs_with_pk:\n                    obj_with_pk._state.adding = False\n                    obj_with_pk._state.db = self.db\n            if objs_without_pk:\n                fields = [f for f in fields if not isinstance(f, AutoField)]\n                returned_columns = self._batched_insert(\n                    objs_without_pk,\n                    fields,\n                    batch_size,\n                    on_conflict=on_conflict,\n                    update_fields=update_fields,\n                    unique_fields=unique_fields,\n                )\n                connection = connections[self.db]\n                if (\n                    connection.features.can_return_rows_from_bulk_insert\n                    and on_conflict is None\n                ):\n                    assert len(returned_columns) == len(objs_without_pk)\n                for obj_without_pk, results in zip(objs_without_pk, returned_columns):\n                    for result, field in zip(results, opts.db_returning_fields):\n                        setattr(obj_without_pk, field.attname, result)\n                    obj_without_pk._state.adding = False\n                    obj_without_pk._state.db = self.db\n\n        return objs"
        }
      ]
    },
    {
      "pr_number": 16835,
      "pr_title": "Fixed #34544 -- Avoided DBMS_LOB.SUBSTR() wrapping with IS NULL condition on Oracle.",
      "pr_body": "ticket-34544\r\n\r\nRegression in 09ffc5c1212d4ced58b708cbbf3dfbfb77b782ca.\r\n\r\nThanks Michael Smith for the report.\r\n\r\nThis also reverts commit 1e4da439556cdd69eb9f91e07f99cf77997e70d2.",
      "issue_id": 34544,
      "issue_title": "DBMS_LOB.SUBSTR() is unnecessary for \"IS NULL\" condition on Oracle.",
      "issue_body": "Model contains\nsomefield = models.TextField(blank=True, null=True)\nAt 4.1.9:\nprint(MyModel.objects.filter(somefield=None).only('id').query)\nSELECT \"TNAME_MYMODEL\".\"ID\" FROM \"TNAME_MYMODEL\" WHERE \"TNAME_MYMODEL\".\"SOMEFIELD\"  IS NULL\nworks\nAt 4.2.0:\nprint(MyModel.objects.filter(somefield=None).only('id').query)\nSELECT \"TNAME_MYMODEL\".\"ID\" FROM \"TNAME_MYMODEL\" WHERE DBMS_LOB.SUBSTR(\"TNAME_MYMODEL\".\"SOMEFIELD\") IS NULL\n\nDatabaseError: ORA-06502: PL/SQL: numeric or value error: character string buffer too small\nORA-06512: at line 1",
      "issue_closed_at": "2023-05-08T12:34:53",
      "base_commit": "aaf8c76c567e8311f4a85cf74c82fc3d70cc6f12",
      "changes": [
        {
          "file": "django/db/backends/oracle/operations.py",
          "type": "function",
          "name": "fetch_returned_insert_columns",
          "class_name": "DatabaseOperations",
          "code": "def fetch_returned_insert_columns(self, cursor, returning_params):\n        columns = []\n        for param in returning_params:\n            value = param.get_value()\n            if value == []:\n                raise DatabaseError(\n                    \"The database did not return a new row id. Probably \"\n                    '\"ORA-1403: no data found\" was raised internally but was '\n                    \"hidden by the Oracle OCI library (see \"\n                    \"https://code.djangoproject.com/ticket/28859).\"\n                )\n            columns.append(value[0])\n        return tuple(columns)"
        },
        {
          "file": "django/db/backends/oracle/operations.py",
          "type": "function",
          "name": "last_insert_id",
          "class_name": "DatabaseOperations",
          "code": "def last_insert_id(self, cursor, table_name, pk_name):\n        sq_name = self._get_sequence_name(cursor, strip_quotes(table_name), pk_name)\n        cursor.execute('\"%s\".currval' % sq_name)\n        return cursor.fetchone()[0]"
        },
        {
          "file": "django/db/backends/postgresql/features.py",
          "type": "class",
          "name": "DatabaseFeatures",
          "code": "class DatabaseFeatures(BaseDatabaseFeatures):\n    minimum_database_version = (12,)\n    allows_group_by_selected_pks = True\n    can_return_columns_from_insert = True\n    can_return_rows_from_bulk_insert = True\n    has_real_datatype = True\n    has_native_uuid_field = True\n    has_native_duration_field = True\n    has_native_json_field = True\n    can_defer_constraint_checks = True\n    has_select_for_update = True\n    has_select_for_update_nowait = True\n    has_select_for_update_of = True\n    has_select_for_update_skip_locked = True\n    has_select_for_no_key_update = True\n    can_release_savepoints = True\n    supports_comments = True\n    supports_tablespaces = True\n    supports_transactions = True\n    can_introspect_materialized_views = True\n    can_distinct_on_fields = True\n    can_rollback_ddl = True\n    schema_editor_uses_clientside_param_binding = True\n    supports_combined_alters = True\n    nulls_order_largest = True\n    closed_cursor_error_class = InterfaceError\n    greatest_least_ignores_nulls = True\n    can_clone_databases = True\n    supports_temporal_subtraction = True\n    supports_slicing_ordering_in_compound = True\n    create_test_procedure_without_params_sql = \"\"\"\n        CREATE FUNCTION test_procedure () RETURNS void AS $$\n        DECLARE\n            V_I INTEGER;\n        BEGIN\n            V_I := 1;\n        END;\n    $$ LANGUAGE plpgsql;\"\"\"\n    create_test_procedure_with_int_param_sql = \"\"\"\n        CREATE FUNCTION test_procedure (P_I INTEGER) RETURNS void AS $$\n        DECLARE\n            V_I INTEGER;\n        BEGIN\n            V_I := P_I;\n        END;\n    $$ LANGUAGE plpgsql;\"\"\"\n    create_test_table_with_composite_primary_key = \"\"\"\n        CREATE TABLE test_table_composite_pk (\n            column_1 INTEGER NOT NULL,\n            column_2 INTEGER NOT NULL,\n            PRIMARY KEY(column_1, column_2)\n        )\n    \"\"\"\n    requires_casted_case_in_updates = True\n    supports_over_clause = True\n    only_supports_unbounded_with_preceding_and_following = True\n    supports_aggregate_filter_clause = True\n    supported_explain_formats = {\"JSON\", \"TEXT\", \"XML\", \"YAML\"}\n    supports_deferrable_unique_constraints = True\n    has_json_operators = True\n    json_key_contains_list_matching_requires_list = True\n    supports_update_conflicts = True\n    supports_update_conflicts_with_target = True\n    supports_covering_indexes = True\n    can_rename_index = True\n    test_collations = {\n        \"non_default\": \"sv-x-icu\",\n        \"swedish_ci\": \"sv-x-icu\",\n    }\n    test_now_utc_template = \"STATEMENT_TIMESTAMP() AT TIME ZONE 'UTC'\"\n\n    django_test_skips = {\n        \"opclasses are PostgreSQL only.\": {\n            \"indexes.tests.SchemaIndexesNotPostgreSQLTests.\"\n            \"test_create_index_ignores_opclasses\",\n        },\n    }\n\n    @cached_property\n    def django_test_expected_failures(self):\n        expected_failures = set()\n        if self.uses_server_side_binding:\n            expected_failures.update(\n                {\n                    # Parameters passed to expressions in SELECT and GROUP BY\n                    # clauses are not recognized as the same values when using\n                    # server-side binding cursors (#34255).\n                    \"aggregation.tests.AggregateTestCase.\"\n                    \"test_group_by_nested_expression_with_params\",\n                }\n            )\n        return expected_failures\n\n    @cached_property\n    def uses_server_side_binding(self):\n        options = self.connection.settings_dict[\"OPTIONS\"]\n        return is_psycopg3 and options.get(\"server_side_binding\") is True\n\n    @cached_property\n    def prohibits_null_characters_in_text_exception(self):\n        if is_psycopg3:\n            return DataError, \"PostgreSQL text fields cannot contain NUL (0x00) bytes\"\n        else:\n            return ValueError, \"A string literal cannot contain NUL (0x00) characters.\"\n\n    @cached_property\n    def introspected_field_types(self):\n        return {\n            **super().introspected_field_types,\n            \"PositiveBigIntegerField\": \"BigIntegerField\",\n            \"PositiveIntegerField\": \"IntegerField\",\n            \"PositiveSmallIntegerField\": \"SmallIntegerField\",\n        }\n\n    @cached_property\n    def is_postgresql_13(self):\n        return self.connection.pg_version >= 130000\n\n    @cached_property\n    def is_postgresql_14(self):\n        return self.connection.pg_version >= 140000\n\n    has_bit_xor = property(operator.attrgetter(\"is_postgresql_14\"))\n    supports_covering_spgist_indexes = property(operator.attrgetter(\"is_postgresql_14\"))\n    supports_unlimited_charfield = True"
        }
      ]
    },
    {
      "pr_number": 16315,
      "pr_title": "Fixed #34171 -- Fixed QuerySet.bulk_create() on fields with db_column in unique_fields/update_fields.",
      "pr_body": "[Ticket 34171](https://code.djangoproject.com/ticket/34171)\r\nI think we don't need separate test for this, just adding a ```db_column``` to one of the models in ```bulk_create``` should do the work.",
      "issue_id": 34171,
      "issue_title": "QuerySet.bulk_create() crashes on mixed case columns in unique_fields/update_fields.",
      "issue_body": "Not sure exactly how to phrase this, but when I I'm calling\nbulk_update\non the manager for a class with\ndb_column\nset on fields the SQL is invalid. Ellipses indicate other fields excluded for clarity.\nclass ActivityBlackListed(models.Model):\n    \"\"\"\n    Originally sourced from Activity_BlackListed in /home/josh/PNDS_Interim_MIS-Data.accdb (13 records)\n    \"\"\"\n\n    class Meta:\n        db_table = \"Activity_BlackListed\"\n\n    blacklistid = models.IntegerField(primary_key=True, db_column=\"BlacklistID\")\n    sectorid = models.IntegerField(null=True, blank=True, db_column=\"SectorID\")\n    ...\nqs.bulk_create(instances, update_conflicts=True, update_fields=[\"sectorid\", ...], unique_fields=[\"blacklistid\"])\nThe \"INSERT\" code does take into account the db_columns\nINSERT INTO \"Activity_BlackListed\" (\"BlacklistID\",...) VALUES (%s,  ...),\nThe code which is generated for \"ON CONFLICT\" uses the field name and not the db_column which leads to a syntax error\n'ON CONFLICT(\"blacklistid\") DO UPDATE SET \"sectorid\" = EXCLUDED.\"sectorid\", ...\nPostgreSQL returns\nERROR:  column \"blacklistid\" does not exist at character 1508\nWhat should be generated is I think:\n'ON CONFLICT(\"BlacklistID\") DO UPDATE SET \"SectorID\" = EXCLUDED.\"SectorID\", ...",
      "issue_closed_at": "2022-11-22T13:05:04",
      "base_commit": "7d5329852f19c6ae78c6f6f3d3e41835377bf295",
      "changes": [
        {
          "file": "django/db/models/query.py",
          "type": "function",
          "name": "_check_bulk_create_options",
          "class_name": "QuerySet",
          "code": "def _check_bulk_create_options(\n        self, ignore_conflicts, update_conflicts, update_fields, unique_fields\n    ):\n        if ignore_conflicts and update_conflicts:\n            raise ValueError(\n                \"ignore_conflicts and update_conflicts are mutually exclusive.\"\n            )\n        db_features = connections[self.db].features\n        if ignore_conflicts:\n            if not db_features.supports_ignore_conflicts:\n                raise NotSupportedError(\n                    \"This database backend does not support ignoring conflicts.\"\n                )\n            return OnConflict.IGNORE\n        elif update_conflicts:\n            if not db_features.supports_update_conflicts:\n                raise NotSupportedError(\n                    \"This database backend does not support updating conflicts.\"\n                )\n            if not update_fields:\n                raise ValueError(\n                    \"Fields that will be updated when a row insertion fails \"\n                    \"on conflicts must be provided.\"\n                )\n            if unique_fields and not db_features.supports_update_conflicts_with_target:\n                raise NotSupportedError(\n                    \"This database backend does not support updating \"\n                    \"conflicts with specifying unique fields that can trigger \"\n                    \"the upsert.\"\n                )\n            if not unique_fields and db_features.supports_update_conflicts_with_target:\n                raise ValueError(\n                    \"Unique fields that can trigger the upsert must be provided.\"\n                )\n            # Updating primary keys and non-concrete fields is forbidden.\n            update_fields = [self.model._meta.get_field(name) for name in update_fields]\n            if any(not f.concrete or f.many_to_many for f in update_fields):\n                raise ValueError(\n                    \"bulk_create() can only be used with concrete fields in \"\n                    \"update_fields.\"\n                )\n            if any(f.primary_key for f in update_fields):\n                raise ValueError(\n                    \"bulk_create() cannot be used with primary keys in \"\n                    \"update_fields.\"\n                )\n            if unique_fields:\n                unique_fields = [\n                    self.model._meta.get_field(name) for name in unique_fields\n                ]\n                if any(not f.concrete or f.many_to_many for f in unique_fields):\n                    raise ValueError(\n                        \"bulk_create() can only be used with concrete fields \"\n                        \"in unique_fields.\"\n                    )\n            return OnConflict.UPDATE\n        return None"
        },
        {
          "file": "django/db/models/query.py",
          "type": "function",
          "name": "_check_bulk_create_options",
          "class_name": "QuerySet",
          "code": "def _check_bulk_create_options(\n        self, ignore_conflicts, update_conflicts, update_fields, unique_fields\n    ):\n        if ignore_conflicts and update_conflicts:\n            raise ValueError(\n                \"ignore_conflicts and update_conflicts are mutually exclusive.\"\n            )\n        db_features = connections[self.db].features\n        if ignore_conflicts:\n            if not db_features.supports_ignore_conflicts:\n                raise NotSupportedError(\n                    \"This database backend does not support ignoring conflicts.\"\n                )\n            return OnConflict.IGNORE\n        elif update_conflicts:\n            if not db_features.supports_update_conflicts:\n                raise NotSupportedError(\n                    \"This database backend does not support updating conflicts.\"\n                )\n            if not update_fields:\n                raise ValueError(\n                    \"Fields that will be updated when a row insertion fails \"\n                    \"on conflicts must be provided.\"\n                )\n            if unique_fields and not db_features.supports_update_conflicts_with_target:\n                raise NotSupportedError(\n                    \"This database backend does not support updating \"\n                    \"conflicts with specifying unique fields that can trigger \"\n                    \"the upsert.\"\n                )\n            if not unique_fields and db_features.supports_update_conflicts_with_target:\n                raise ValueError(\n                    \"Unique fields that can trigger the upsert must be provided.\"\n                )\n            # Updating primary keys and non-concrete fields is forbidden.\n            update_fields = [self.model._meta.get_field(name) for name in update_fields]\n            if any(not f.concrete or f.many_to_many for f in update_fields):\n                raise ValueError(\n                    \"bulk_create() can only be used with concrete fields in \"\n                    \"update_fields.\"\n                )\n            if any(f.primary_key for f in update_fields):\n                raise ValueError(\n                    \"bulk_create() cannot be used with primary keys in \"\n                    \"update_fields.\"\n                )\n            if unique_fields:\n                unique_fields = [\n                    self.model._meta.get_field(name) for name in unique_fields\n                ]\n                if any(not f.concrete or f.many_to_many for f in unique_fields):\n                    raise ValueError(\n                        \"bulk_create() can only be used with concrete fields \"\n                        \"in unique_fields.\"\n                    )\n            return OnConflict.UPDATE\n        return None"
        },
        {
          "file": "django/db/models/query.py",
          "type": "function",
          "name": "bulk_create",
          "class_name": "QuerySet",
          "code": "def bulk_create(\n        self,\n        objs,\n        batch_size=None,\n        ignore_conflicts=False,\n        update_conflicts=False,\n        update_fields=None,\n        unique_fields=None,\n    ):\n        \"\"\"\n        Insert each of the instances into the database. Do *not* call\n        save() on each of the instances, do not send any pre/post_save\n        signals, and do not set the primary key attribute if it is an\n        autoincrement field (except if features.can_return_rows_from_bulk_insert=True).\n        Multi-table models are not supported.\n        \"\"\"\n        # When you bulk insert you don't get the primary keys back (if it's an\n        # autoincrement, except if can_return_rows_from_bulk_insert=True), so\n        # you can't insert into the child tables which references this. There\n        # are two workarounds:\n        # 1) This could be implemented if you didn't have an autoincrement pk\n        # 2) You could do it by doing O(n) normal inserts into the parent\n        #    tables to get the primary keys back and then doing a single bulk\n        #    insert into the childmost table.\n        # We currently set the primary keys on the objects when using\n        # PostgreSQL via the RETURNING ID clause. It should be possible for\n        # Oracle as well, but the semantics for extracting the primary keys is\n        # trickier so it's not done yet.\n        if batch_size is not None and batch_size <= 0:\n            raise ValueError(\"Batch size must be a positive integer.\")\n        # Check that the parents share the same concrete model with the our\n        # model to detect the inheritance pattern ConcreteGrandParent ->\n        # MultiTableParent -> ProxyChild. Simply checking self.model._meta.proxy\n        # would not identify that case as involving multiple tables.\n        for parent in self.model._meta.get_parent_list():\n            if parent._meta.concrete_model is not self.model._meta.concrete_model:\n                raise ValueError(\"Can't bulk create a multi-table inherited model\")\n        if not objs:\n            return objs\n        opts = self.model._meta\n        if unique_fields:\n            # Primary key is allowed in unique_fields.\n            unique_fields = [\n                opts.pk.name if name == \"pk\" else name for name in unique_fields\n            ]\n        on_conflict = self._check_bulk_create_options(\n            ignore_conflicts,\n            update_conflicts,\n            update_fields,\n            unique_fields,\n        )\n        self._for_write = True\n        fields = opts.concrete_fields\n        objs = list(objs)\n        self._prepare_for_bulk_create(objs)\n        with transaction.atomic(using=self.db, savepoint=False):\n            objs_with_pk, objs_without_pk = partition(lambda o: o.pk is None, objs)\n            if objs_with_pk:\n                returned_columns = self._batched_insert(\n                    objs_with_pk,\n                    fields,\n                    batch_size,\n                    on_conflict=on_conflict,\n                    update_fields=update_fields,\n                    unique_fields=unique_fields,\n                )\n                for obj_with_pk, results in zip(objs_with_pk, returned_columns):\n                    for result, field in zip(results, opts.db_returning_fields):\n                        if field != opts.pk:\n                            setattr(obj_with_pk, field.attname, result)\n                for obj_with_pk in objs_with_pk:\n                    obj_with_pk._state.adding = False\n                    obj_with_pk._state.db = self.db\n            if objs_without_pk:\n                fields = [f for f in fields if not isinstance(f, AutoField)]\n                returned_columns = self._batched_insert(\n                    objs_without_pk,\n                    fields,\n                    batch_size,\n                    on_conflict=on_conflict,\n                    update_fields=update_fields,\n                    unique_fields=unique_fields,\n                )\n                connection = connections[self.db]\n                if (\n                    connection.features.can_return_rows_from_bulk_insert\n                    and on_conflict is None\n                ):\n                    assert len(returned_columns) == len(objs_without_pk)\n                for obj_without_pk, results in zip(objs_without_pk, returned_columns):\n                    for result, field in zip(results, opts.db_returning_fields):\n                        setattr(obj_without_pk, field.attname, result)\n                    obj_without_pk._state.adding = False\n                    obj_without_pk._state.db = self.db\n\n        return objs"
        },
        {
          "file": "django/db/models/sql/compiler.py",
          "type": "function",
          "name": "as_sql",
          "class_name": "SQLAggregateCompiler",
          "code": "def as_sql(self):\n        \"\"\"\n        Create the SQL for this query. Return the SQL string and list of\n        parameters.\n        \"\"\"\n        sql, params = [], []\n        for annotation in self.query.annotation_select.values():\n            ann_sql, ann_params = self.compile(annotation)\n            ann_sql, ann_params = annotation.select_format(self, ann_sql, ann_params)\n            sql.append(ann_sql)\n            params.extend(ann_params)\n        self.col_count = len(self.query.annotation_select)\n        sql = \", \".join(sql)\n        params = tuple(params)\n\n        inner_query_sql, inner_query_params = self.query.inner_query.get_compiler(\n            self.using,\n            elide_empty=self.elide_empty,\n        ).as_sql(with_col_aliases=True)\n        sql = \"SELECT %s FROM (%s) subquery\" % (sql, inner_query_sql)\n        params += inner_query_params\n        return sql, params"
        }
      ]
    },
    {
      "pr_number": 9902,
      "pr_title": "Fixed #29367 -- Fixed model state on objects with a primary key created with QuerySet.bulk_create().",
      "pr_body": "Instance of models with manually set primary_key persisted with\r\nbulk_create would not update the state `adding` and `db` attributes.\r\n\r\nSee: ticket 29367\r\n",
      "issue_id": 29367,
      "issue_title": "bulk_create with manual primary_key don't update instances state",
      "issue_body": "Given a model with manually defined primary keys:\nclass\nState\n(\nmodels\n.\nModel\n):\ntwo_letter_code\n=\nmodels\n.\nCharField\n(\nmax_length\n=\n2\n,\nprimary_key\n=\nTrue\n)\nPerforming a bulk_create with model instances will not correctly update their state.\nLooping through the instances and calling save() individually will result in instances with different state from instances persisted with bulk_create:\nstate_ca\n=\nState\n(\ntwo_letter_code\n=\n'CA'\n)\nState\n.\nobjects\n.\nbulk_create\n([\nstate_ca\n])\nstate_ca\n.\n_state\n.\nadding\n# => True\nstate_ca\n.\n_state\n.\ndb\n# => None\nstate_ny\n=\nState\n(\ntwo_letter_code\n=\n'NY'\n)\nstate_ny\n.\nsave\n()\nstate_ny\n.\n_state\n.\nadding\n# => False\nstate_ny\n.\n_state\n.\ndb\n# => 'default'\nOne implication of this behavior is that the instances saved with bulk_create can't be used to build relationships with model instances loaded with other Queryset API methods.\nHere is a demonstration:\nclass\nGroup\n(\nmodels\n.\nModel\n):\next_id\n=\nmodels\n.\nCharField\n(\nprimary_key\n=\nTrue\n,\nmax_length\n=\n32\n)\nclass\nAnalyst\n(\nmodels\n.\nModel\n):\next_id\n=\nmodels\n.\nCharField\n(\nprimary_key\n=\nTrue\n,\nmax_length\n=\n32\n)\ngroups\n=\nmodels\n.\nManyToManyField\n(\nGroup\n)\ngroup_aaa\n=\nGroup\n.\nobjects\n.\nget\n(\next_id\n=\n'AAA'\n)\nanalyst_eee\n=\nAnalyst\n(\next_id\n=\n'EEE'\n)\nAnalyst\n.\nobjects\n.\nbulk_create\n([\nanalyst_eee\n])\nanalyst_eee\n.\ngroups\n.\nset\n([\ngroup_aaa\n])\n# ValueError: Cannot add \"<Group: AAA>\": instance is on database \"None\", value is on database \"default\"\nIt fails when the\n._state.db\nis compared.\nA current workaround option is to manually set the\n._state.db\nafter the bulk_create:\nanalyst_eee = Analyst(ext_id='EEE')\nAnalyst.objects.bulk_create([analyst_eee])\nanalyst_eee._state.db = 'default'\n\nanalyst_eee.groups.set([group_aaa]) # And now it works",
      "issue_closed_at": "2018-04-27T17:20:07",
      "base_commit": "3246ad106517e61437f80e8ef3c9d216754039e7",
      "changes": [
        {
          "file": "django/db/models/query.py",
          "type": "function",
          "name": "bulk_create",
          "class_name": "QuerySet",
          "code": "def bulk_create(self, objs, batch_size=None):\n        \"\"\"\n        Insert each of the instances into the database. Do *not* call\n        save() on each of the instances, do not send any pre/post_save\n        signals, and do not set the primary key attribute if it is an\n        autoincrement field (except if features.can_return_ids_from_bulk_insert=True).\n        Multi-table models are not supported.\n        \"\"\"\n        # When you bulk insert you don't get the primary keys back (if it's an\n        # autoincrement, except if can_return_ids_from_bulk_insert=True), so\n        # you can't insert into the child tables which references this. There\n        # are two workarounds:\n        # 1) This could be implemented if you didn't have an autoincrement pk\n        # 2) You could do it by doing O(n) normal inserts into the parent\n        #    tables to get the primary keys back and then doing a single bulk\n        #    insert into the childmost table.\n        # We currently set the primary keys on the objects when using\n        # PostgreSQL via the RETURNING ID clause. It should be possible for\n        # Oracle as well, but the semantics for extracting the primary keys is\n        # trickier so it's not done yet.\n        assert batch_size is None or batch_size > 0\n        # Check that the parents share the same concrete model with the our\n        # model to detect the inheritance pattern ConcreteGrandParent ->\n        # MultiTableParent -> ProxyChild. Simply checking self.model._meta.proxy\n        # would not identify that case as involving multiple tables.\n        for parent in self.model._meta.get_parent_list():\n            if parent._meta.concrete_model is not self.model._meta.concrete_model:\n                raise ValueError(\"Can't bulk create a multi-table inherited model\")\n        if not objs:\n            return objs\n        self._for_write = True\n        connection = connections[self.db]\n        fields = self.model._meta.concrete_fields\n        objs = list(objs)\n        self._populate_pk_values(objs)\n        with transaction.atomic(using=self.db, savepoint=False):\n            objs_with_pk, objs_without_pk = partition(lambda o: o.pk is None, objs)\n            if objs_with_pk:\n                self._batched_insert(objs_with_pk, fields, batch_size)\n            if objs_without_pk:\n                fields = [f for f in fields if not isinstance(f, AutoField)]\n                ids = self._batched_insert(objs_without_pk, fields, batch_size)\n                if connection.features.can_return_ids_from_bulk_insert:\n                    assert len(ids) == len(objs_without_pk)\n                for obj_without_pk, pk in zip(objs_without_pk, ids):\n                    obj_without_pk.pk = pk\n                    obj_without_pk._state.adding = False\n                    obj_without_pk._state.db = self.db\n\n        return objs"
        }
      ]
    },
    {
      "pr_number": 8905,
      "pr_title": "Fixed #28375 -- Fixed KeyError raised on reverse prefetch of a model with OneToOne primary key to non-pk field",
      "pr_body": "https://code.djangoproject.com/ticket/28375",
      "issue_id": 28375,
      "issue_title": "QuerySet.prefetch_related() crashes with KeyError if model uses to_field and string primary key",
      "issue_body": "The issue:\nprefetch_related failed if prefetching by char primary key.\nDjango version 1.11.3\nPython 2.7\nreproducible steps:\n1) django-admin startproject pk_string\n2) cd pk_string\n3) django-admin startapp users\n4) update  users.models\n# -*- coding: utf-8 -*-\nfrom\n__future__\nimport\nunicode_literals\nfrom\ndjango.db\nimport\nmodels\n# Create your models here.\nclass\nUser\n(\nmodels\n.\nModel\n):\nemail\n=\nmodels\n.\nCharField\n(\nmax_length\n=\n255\n,\nunique\n=\nTrue\n)\nclass\nUserData\n(\nmodels\n.\nModel\n):\nemail\n=\nmodels\n.\nOneToOneField\n(\nUser\n,\nto_field\n=\n'email'\n,\nprimary_key\n=\nTrue\n)\nnote\n=\nmodels\n.\nCharField\n(\nmax_length\n=\n255\n);\n5) install app pk_string.settings\n...\nINSTALLED_APPS\n=\n[\n'django.contrib.admin'\n,\n'django.contrib.auth'\n,\n'django.contrib.contenttypes'\n,\n'django.contrib.sessions'\n,\n'django.contrib.messages'\n,\n'django.contrib.staticfiles'\n,\n'users'\n,\n]\n...\n6) ./manage.py makemigrations\n7) ./manage.py migrate\n8) ./manage.py shell\nfrom\nusers.models\nimport\nUser\n,\nUserData\nUser\n.\nobjects\n.\ncreate\n(\nemail\n=\n'111111'\n)\nUser\n.\nobjects\n.\ncreate\n(\nemail\n=\n'222222'\n)\nUser\n.\nobjects\n.\ncreate\n(\nemail\n=\n'333333'\n)\nUser\n.\nobjects\n.\ncreate\n(\nemail\n=\n'444444'\n)\nusers\n=\nUser\n.\nobjects\n.\nall\n()\n.\nprefetch_related\n(\n'userdata'\n)\nusers\n[\n0\n]\n>>>\n\"<User: User object>\"\nUserData\n.\nobjects\n.\ncreate\n(\nemail\n=\nusers\n[\n0\n]\n,\nnote\n=\n'111'\n)\nUserData\n.\nobjects\n.\ncreate\n(\nemail\n=\nusers\n[\n2\n]\n,\nnote\n=\n'222'\n)\nUserData\n.\nobjects\n.\ncreate\n(\nemail\n=\nusers\n[\n3\n]\n,\nnote\n=\n'333'\n)\nusers\n=\nUser\n.\nobjects\n.\nall\n()\n.\nprefetch_related\n(\n'userdata'\n)\n>>>\nusers\n[\n0\n]\nTraceback\n(\nmost\nrecent\ncall\nlast\n):\nFile\n\"<console>\"\n,\nline\n1\n,\nin\n<\nmodule\n>\nFile\n\"/usr/local/lib/python2.7/site-packages/django/db/models/query.py\"\n,\nline\n289\n,\nin\n__getitem__\nreturn\nlist\n(\nqs\n)[\n0\n]\nFile\n\"/usr/local/lib/python2.7/site-packages/django/db/models/query.py\"\n,\nline\n250\n,\nin\n__iter__\nself\n.\n_fetch_all\n()\nFile\n\"/usr/local/lib/python2.7/site-packages/django/db/models/query.py\"\n,\nline\n1120\n,\nin\n_fetch_all\nself\n.\n_prefetch_related_objects\n()\nFile\n\"/usr/local/lib/python2.7/site-packages/django/db/models/query.py\"\n,\nline\n675\n,\nin\n_prefetch_related_objects\nprefetch_related_objects\n(\nself\n.\n_result_cache\n,\n*\nself\n.\n_prefetch_related_lookups\n)\nFile\n\"/usr/local/lib/python2.7/site-packages/django/db/models/query.py\"\n,\nline\n1469\n,\nin\nprefetch_related_objects\nobj_list\n,\nadditional_lookups\n=\nprefetch_one_level\n(\nobj_list\n,\nprefetcher\n,\nlookup\n,\nlevel\n)\nFile\n\"/usr/local/lib/python2.7/site-packages/django/db/models/query.py\"\n,\nline\n1582\n,\nin\nprefetch_one_level\nprefetcher\n.\nget_prefetch_queryset\n(\ninstances\n,\nlookup\n.\nget_current_queryset\n(\nlevel\n)))\nFile\n\"/usr/local/lib/python2.7/site-packages/django/db/models/fields/related_descriptors.py\"\n,\nline\n362\n,\nin\nget_prefetch_queryset\ninstance\n=\ninstances_dict\n[\nrel_obj_attr\n(\nrel_obj\n)]\nKeyError\n:\nu\n'111111'",
      "issue_closed_at": "2017-08-21T15:47:49",
      "base_commit": "b5ad5c628a0327c2208d76e5cacb3cb6f48750b5",
      "changes": [
        {
          "file": "django/db/models/fields/related_descriptors.py",
          "type": "function",
          "name": "get_prefetch_queryset",
          "class_name": "ManyRelatedManager",
          "code": "def get_prefetch_queryset(self, instances, queryset=None):\n            if queryset is None:\n                queryset = super().get_queryset()\n\n            queryset._add_hints(instance=instances[0])\n            queryset = queryset.using(queryset._db or self._db)\n\n            query = {'%s__in' % self.query_field_name: instances}\n            queryset = queryset._next_is_sticky().filter(**query)\n\n            # M2M: need to annotate the query in order to get the primary model\n            # that the secondary model was actually related to. We know that\n            # there will already be a join on the join table, so we can just add\n            # the select.\n\n            # For non-autocreated 'through' models, can't assume we are\n            # dealing with PK values.\n            fk = self.through._meta.get_field(self.source_field_name)\n            join_table = fk.model._meta.db_table\n            connection = connections[queryset.db]\n            qn = connection.ops.quote_name\n            queryset = queryset.extra(select={\n                '_prefetch_related_val_%s' % f.attname:\n                '%s.%s' % (qn(join_table), qn(f.column)) for f in fk.local_related_fields})\n            return (\n                queryset,\n                lambda result: tuple(\n                    getattr(result, '_prefetch_related_val_%s' % f.attname)\n                    for f in fk.local_related_fields\n                ),\n                lambda inst: tuple(\n                    f.get_db_prep_value(getattr(inst, f.attname), connection)\n                    for f in fk.foreign_related_fields\n                ),\n                False,\n                self.prefetch_cache_name,\n                False,\n            )"
        }
      ]
    }
  ]
}