// Copyright 2021 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_

#include <atomic>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <string>

#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/endian.h"
#include "absl/base/macros.h"
#include "absl/base/nullability.h"
#include "absl/base/optimization.h"
#include "absl/container/internal/compressed_tuple.h"
#include "absl/container/internal/container_memory.h"
#include "absl/strings/string_view.h"

// We can only add poisoning if we can detect consteval executions.
#if defined(ABSL_HAVE_CONSTANT_EVALUATED) && \
    (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
     defined(ABSL_HAVE_MEMORY_SANITIZER))
#define ABSL_INTERNAL_CORD_HAVE_SANITIZER 1
#endif

#define ABSL_CORD_INTERNAL_NO_SANITIZE \
  ABSL_ATTRIBUTE_NO_SANITIZE_ADDRESS ABSL_ATTRIBUTE_NO_SANITIZE_MEMORY

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace cord_internal {

// The overhead of a vtable is too much for Cord, so we roll our own subclasses
// using only a single byte to differentiate classes from each other - the "tag"
// byte.  Define the subclasses first so we can provide downcasting helper
// functions in the base class.
struct CordRep;
struct CordRepConcat;
struct CordRepExternal;
struct CordRepFlat;
struct CordRepSubstring;
struct CordRepCrc;
class CordRepBtree;

class CordzInfo;

// Default feature enable states for cord ring buffers
enum CordFeatureDefaults { kCordShallowSubcordsDefault = false };

extern std::atomic<bool> shallow_subcords_enabled;

inline void enable_shallow_subcords(bool enable) {
  shallow_subcords_enabled.store(enable, std::memory_order_relaxed);
}

enum Constants {
  // The inlined size to use with absl::InlinedVector.
  //
  // Note: The InlinedVectors in this file (and in cord.h) do not need to use
  // the same value for their inlined size. The fact that they do is historical.
  // It may be desirable for each to use a different inlined size optimized for
  // that InlinedVector's usage.
  //
  // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for
  // the inlined vector size (47 exists for backward compatibility).
  kInlinedVectorSize = 47,

  // Prefer copying blocks of at most this size, otherwise reference count.
  kMaxBytesToCopy = 511
};

// Emits a fatal error "Unexpected node type: xyz" and aborts the program.
[[noreturn]] void LogFatalNodeType(CordRep* rep);

// Fast implementation of memmove for up to 15 bytes. This implementation is
// safe for overlapping regions. If nullify_tail is true, the destination is
// padded with '\0' up to 15 bytes.
template <bool nullify_tail = false>
inline void SmallMemmove(char* dst, const char* src, size_t n) {
  if (n >= 8) {
    assert(n <= 15);
    uint64_t buf1;
    uint64_t buf2;
    memcpy(&buf1, src, 8);
    memcpy(&buf2, src + n - 8, 8);
    if (nullify_tail) {
      memset(dst + 7, 0, 8);
    }
    // GCC 12 has a false-positive -Wstringop-overflow warning here.
#if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wstringop-overflow"
#endif
    memcpy(dst, &buf1, 8);
    memcpy(dst + n - 8, &buf2, 8);
#if ABSL_INTERNAL_HAVE_MIN_GNUC_VERSION(12, 0)
#pragma GCC diagnostic pop
#endif
  } else if (n >= 4) {
    uint32_t buf1;
    uint32_t buf2;
    memcpy(&buf1, src, 4);
    memcpy(&buf2, src + n - 4, 4);
    if (nullify_tail) {
      memset(dst + 4, 0, 4);
      memset(dst + 7, 0, 8);
    }
    memcpy(dst, &buf1, 4);
    memcpy(dst + n - 4, &buf2, 4);
  } else {
    if (n != 0) {
      dst[0] = src[0];
      dst[n / 2] = src[n / 2];
      dst[n - 1] = src[n - 1];
    }
    if (nullify_tail) {
      memset(dst + 7, 0, 8);
      memset(dst + n, 0, 8);
    }
  }
}

// Compact class for tracking the reference count and state flags for CordRep
// instances.  Data is stored in an atomic int32_t for compactness and speed.
class RefcountAndFlags {
 public:
  constexpr RefcountAndFlags() : count_{kRefIncrement} {}
  struct Immortal {};
  explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {}

  // Increments the reference count. Imposes no memory ordering.
  inline void Increment() {
    count_.fetch_add(kRefIncrement, std::memory_order_relaxed);
  }

  // Asserts that the current refcount is greater than 0. If the refcount is
  // greater than 1, decrements the reference count.
  //
  // Returns false if there are no references outstanding; true otherwise.
  // Inserts barriers to ensure that state written before this method returns
  // false will be visible to a thread that just observed this method returning
  // false.  Always returns false when the immortal bit is set.
  inline bool Decrement() {
    int32_t refcount = count_.load(std::memory_order_acquire);
    assert(refcount > 0 || refcount & kImmortalFlag);
    return refcount != kRefIncrement &&
           count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) !=
               kRefIncrement;
  }

  // Same as Decrement but expect that refcount is greater than 1.
  inline bool DecrementExpectHighRefcount() {
    int32_t refcount =
        count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel);
    assert(refcount > 0 || refcount & kImmortalFlag);
    return refcount != kRefIncrement;
  }

  // Returns the current reference count using acquire semantics.
  inline size_t Get() const {
    return static_cast<size_t>(count_.load(std::memory_order_acquire) >>
                               kNumFlags);
  }

  // Returns whether the atomic integer is 1.
  // If the reference count is used in the conventional way, a
  // reference count of 1 implies that the current thread owns the
  // reference and no other thread shares it.
  // This call performs the test for a reference count of one, and
  // performs the memory barrier needed for the owning thread
  // to act on the object, knowing that it has exclusive access to the
  // object. Always returns false when the immortal bit is set.
  inline bool IsOne() {
    return count_.load(std::memory_order_acquire) == kRefIncrement;
  }

  bool IsImmortal() const {
    return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0;
  }

 private:
  // We reserve the bottom bit for flag.
  // kImmortalBit indicates that this entity should never be collected; it is
  // used for the StringConstant constructor to avoid collecting immutable
  // constant cords.
  enum Flags {
    kNumFlags = 1,

    kImmortalFlag = 0x1,
    kRefIncrement = (1 << kNumFlags),
  };

  std::atomic<int32_t> count_;
};

// Various representations that we allow
enum CordRepKind {
  UNUSED_0 = 0,
  SUBSTRING = 1,
  CRC = 2,
  BTREE = 3,
  UNUSED_4 = 4,
  EXTERNAL = 5,

  // We have different tags for different sized flat arrays,
  // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an
  // allocated range of 32 bytes to 256 KB. The current granularity is:
  // - 8 byte granularity for flat sizes in [32 - 512]
  // - 64 byte granularity for flat sizes in (512 - 8KiB]
  // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB]
  // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should
  // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still
  // represents the minimum flat allocation size. (32 bytes as of now).
  FLAT = 6,
  MAX_FLAT_TAG = 248
};

// There are various locations where we want to check if some rep is a 'plain'
// data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we
// can perform this check in a single branch as 'tag >= EXTERNAL'
// Note that we can leave this optimization to the compiler. The compiler will
// DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`.
static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive");

struct CordRep {
  // Result from an `extract edge` operation. Contains the (possibly changed)
  // tree node as well as the extracted edge, or {tree, nullptr} if no edge
  // could be extracted.
  // On success, the returned `tree` value is null if `extracted` was the only
  // data edge inside the tree, a data edge if there were only two data edges in
  // the tree, or the (possibly new / smaller) remaining tree with the extracted
  // data edge removed.
  struct ExtractResult {
    CordRep* tree;
    CordRep* extracted;
  };

  CordRep() = default;
  constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l)
      : length(l), refcount(immortal), tag(EXTERNAL), storage{} {}

  // The following three fields have to be less than 32 bytes since
  // that is the smallest supported flat node size. Some code optimizations rely
  // on the specific layout of these fields. Notably: the non-trivial field
  // `refcount` being preceded by `length`, and being tailed by POD data
  // members only.
  // LINT.IfChange
  size_t length;
  RefcountAndFlags refcount;
  // If tag < FLAT, it represents CordRepKind and indicates the type of node.
  // Otherwise, the node type is CordRepFlat and the tag is the encoded size.
  uint8_t tag;

  // `storage` provides two main purposes:
  // - the starting point for FlatCordRep.Data() [flexible-array-member]
  // - 3 bytes of additional storage for use by derived classes.
  // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores
  // a 'depth' value in storage[0], and the (future) CordRepBtree class stores
  // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to
  // allocate room for these in the derived class, as not all compilers reuse
  // padding space from the base class (clang and gcc do, MSVC does not, etc)
  uint8_t storage[3];
  // LINT.ThenChange(cord_rep_btree.h:copy_raw)

  // Returns true if this instance's tag matches the requested type.
  constexpr bool IsSubstring() const { return tag == SUBSTRING; }
  constexpr bool IsCrc() const { return tag == CRC; }
  constexpr bool IsExternal() const { return tag == EXTERNAL; }
  constexpr bool IsFlat() const { return tag >= FLAT; }
  constexpr bool IsBtree() const { return tag == BTREE; }

  inline CordRepSubstring* substring();
  inline const CordRepSubstring* substring() const;
  inline CordRepCrc* crc();
  inline const CordRepCrc* crc() const;
  inline CordRepExternal* external();
  inline const CordRepExternal* external() const;
  inline CordRepFlat* flat();
  inline const CordRepFlat* flat() const;
  inline CordRepBtree* btree();
  inline const CordRepBtree* btree() const;

  // --------------------------------------------------------------------
  // Memory management

  // Destroys the provided `rep`.
  static void Destroy(CordRep* rep);

  // Increments the reference count of `rep`.
  // Requires `rep` to be a non-null pointer value.
  static inline CordRep* Ref(CordRep* rep);

  // Decrements the reference count of `rep`. Destroys rep if count reaches
  // zero. Requires `rep` to be a non-null pointer value.
  static inline void Unref(CordRep* rep);
};

struct CordRepSubstring : public CordRep {
  size_t start;  // Starting offset of substring in child
  CordRep* child;

  // Creates a substring on `child`, adopting a reference on `child`.
  // Requires `child` to be either a flat or external node, and `pos` and `n` to
  // form a non-empty partial sub range of `'child`, i.e.:
  // `n > 0 && n < length && n + pos <= length`
  static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n);

  // Creates a substring of `rep`. Does not adopt a reference on `rep`.
  // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`.
  // If `n == rep->length` then this method returns `CordRep::Ref(rep)`
  // If `rep` is a substring of a flat or external node, then this method will
  // return a new substring of that flat or external node with `pos` adjusted
  // with the original `start` position.
  static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n);
};

// Type for function pointer that will invoke the releaser function and also
// delete the `CordRepExternalImpl` corresponding to the passed in
// `CordRepExternal`.
using ExternalReleaserInvoker = void (*)(CordRepExternal*);

// External CordReps are allocated together with a type erased releaser. The
// releaser is stored in the memory directly following the CordRepExternal.
struct CordRepExternal : public CordRep {
  CordRepExternal() = default;
  explicit constexpr CordRepExternal(absl::string_view str)
      : CordRep(RefcountAndFlags::Immortal{}, str.size()),
        base(str.data()),
        releaser_invoker(nullptr) {}

  const char* base;
  // Pointer to function that knows how to call and destroy the releaser.
  ExternalReleaserInvoker releaser_invoker;

  // Deletes (releases) the external rep.
  // Requires rep != nullptr and rep->IsExternal()
  static void Delete(CordRep* rep);
};

// Use go/ranked-overloads for dispatching.
struct Rank0 {};
struct Rank1 : Rank0 {};

template <typename Releaser,
          typename = ::std::invoke_result_t<Releaser, absl::string_view>>
void InvokeReleaser(Rank1, Releaser&& releaser, absl::string_view data) {
  ::std::invoke(std::forward<Releaser>(releaser), data);
}

template <typename Releaser, typename = ::std::invoke_result_t<Releaser>>
void InvokeReleaser(Rank0, Releaser&& releaser, absl::string_view) {
  ::std::invoke(std::forward<Releaser>(releaser));
}

// We use CompressedTuple so that we can benefit from EBCO.
template <typename Releaser>
struct CordRepExternalImpl
    : public CordRepExternal,
      public ::absl::container_internal::CompressedTuple<Releaser> {
  // The extra int arg is so that we can avoid interfering with copy/move
  // constructors while still benefitting from perfect forwarding.
  template <typename T>
  CordRepExternalImpl(T&& releaser, int)
      : CordRepExternalImpl::CompressedTuple(std::forward<T>(releaser)) {
    this->releaser_invoker = &Release;
  }

  ~CordRepExternalImpl() {
    InvokeReleaser(Rank1{}, std::move(this->template get<0>()),
                   absl::string_view(base, length));
  }

  static void Release(CordRepExternal* rep) {
    delete static_cast<CordRepExternalImpl*>(rep);
  }
};

inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos,
                                                  size_t n) {
  assert(child != nullptr);
  assert(n > 0);
  assert(n < child->length);
  assert(pos < child->length);
  assert(n <= child->length - pos);

  // Move to strategical places inside the Cord logic and make this an assert.
  if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) {
    LogFatalNodeType(child);
  }

  CordRepSubstring* rep = new CordRepSubstring();
  rep->length = n;
  rep->tag = SUBSTRING;
  rep->start = pos;
  rep->child = child;
  return rep;
}

inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos,
                                            size_t n) {
  assert(rep != nullptr);
  assert(n != 0);
  assert(pos < rep->length);
  assert(n <= rep->length - pos);
  if (n == rep->length) return CordRep::Ref(rep);
  if (rep->IsSubstring()) {
    pos += rep->substring()->start;
    rep = rep->substring()->child;
  }
  CordRepSubstring* substr = new CordRepSubstring();
  substr->length = n;
  substr->tag = SUBSTRING;
  substr->start = pos;
  substr->child = CordRep::Ref(rep);
  return substr;
}

inline void CordRepExternal::Delete(CordRep* rep) {
  assert(rep != nullptr && rep->IsExternal());
  auto* rep_external = static_cast<CordRepExternal*>(rep);
  assert(rep_external->releaser_invoker != nullptr);
  rep_external->releaser_invoker(rep_external);
}

template <typename Str>
struct ConstInitExternalStorage {
  ABSL_CONST_INIT static CordRepExternal value;
};

template <typename Str>
ABSL_CONST_INIT CordRepExternal
    ConstInitExternalStorage<Str>::value(Str::value);

enum {
  kMaxInline = 15,
};

constexpr char GetOrNull(absl::string_view data, size_t pos) {
  return pos < data.size() ? data[pos] : '\0';
}

// We store cordz_info as 64 bit pointer value in little endian format. This
// guarantees that the least significant byte of cordz_info matches the first
// byte of the inline data representation in `data`, which holds the inlined
// size or the 'is_tree' bit.
using cordz_info_t = int64_t;

// Assert that the `cordz_info` pointer value perfectly overlaps the last half
// of `data` and can hold a pointer value.
static_assert(sizeof(cordz_info_t) * 2 == kMaxInline + 1, "");
static_assert(sizeof(cordz_info_t) >= sizeof(intptr_t), "");

// LittleEndianByte() creates a little endian representation of 'value', i.e.:
// a little endian value where the first byte in the host's representation
// holds 'value`, with all other bytes being 0.
static constexpr cordz_info_t LittleEndianByte(unsigned char value) {
#if defined(ABSL_IS_BIG_ENDIAN)
  return static_cast<cordz_info_t>(value) << ((sizeof(cordz_info_t) - 1) * 8);
#else
  return value;
#endif
}

class InlineData {
 public:
  // DefaultInitType forces the use of the default initialization constructor.
  enum DefaultInitType { kDefaultInit };

  // kNullCordzInfo holds the little endian representation of intptr_t(1)
  // This is the 'null' / initial value of 'cordz_info'. The null value
  // is specifically big endian 1 as with 64-bit pointers, the last
  // byte of cordz_info overlaps with the last byte holding the tag.
  static constexpr cordz_info_t kNullCordzInfo = LittleEndianByte(1);

  // kTagOffset contains the offset of the control byte / tag. This constant is
  // intended mostly for debugging purposes: do not remove this constant as it
  // is actively inspected and used by gdb pretty printing code.
  static constexpr size_t kTagOffset = 0;

  // Implement `~InlineData()` conditionally: we only need this destructor to
  // unpoison poisoned instances under *SAN, and it will only compile correctly
  // if the current compiler supports `absl::is_constant_evaluated()`.
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
  ~InlineData() noexcept { unpoison(); }
#endif

  constexpr InlineData() noexcept { poison_this(); }

  explicit InlineData(DefaultInitType) noexcept : rep_(kDefaultInit) {
    poison_this();
  }

  explicit InlineData(CordRep* rep) noexcept : rep_(rep) {
    ABSL_ASSERT(rep != nullptr);
  }

  // Explicit constexpr constructor to create a constexpr InlineData
  // value. Creates an inlined SSO value if `rep` is null, otherwise
  // creates a tree instance value.
  constexpr InlineData(absl::string_view sv, CordRep* rep) noexcept
      : rep_(rep ? Rep(rep) : Rep(sv)) {
    poison();
  }

  constexpr InlineData(const InlineData& rhs) noexcept;
  InlineData& operator=(const InlineData& rhs) noexcept;
  friend void swap(InlineData& lhs, InlineData& rhs) noexcept;

  friend bool operator==(const InlineData& lhs, const InlineData& rhs) {
#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
    const Rep l = lhs.rep_.SanitizerSafeCopy();
    const Rep r = rhs.rep_.SanitizerSafeCopy();
    return memcmp(&l, &r, sizeof(l)) == 0;
#else
    return memcmp(&lhs, &rhs, sizeof(lhs)) == 0;
#endif
  }
  friend bool operator!=(const InlineData& lhs, const InlineData& rhs) {
    return !operator==(lhs, rhs);
  }

  // Poisons the unused inlined SSO data if the current instance
  // is inlined, else un-poisons the entire instance.
  constexpr void poison();

  // Un-poisons this instance.
  constexpr void unpoison();

  // Poisons the current instance. This is used on default initialization.
  constexpr void poison_this();

  // Returns true if the current instance is empty.
  // The 'empty value' is an inlined data value of zero length.
  bool is_empty() const { return rep_.tag() == 0; }

  // Returns true if the current instance holds a tree value.
  bool is_tree() const { return (rep_.tag() & 1) != 0; }

  // Returns true if the current instance holds a cordz_info value.
  // Requires the current instance to hold a tree value.
  bool is_profiled() const {
    assert(is_tree());
    return rep_.cordz_info() != kNullCordzInfo;
  }

  // Returns true if either of the provided instances hold a cordz_info value.
  // This method is more efficient than the equivalent `data1.is_profiled() ||
  // data2.is_profiled()`. Requires both arguments to hold a tree.
  static bool is_either_profiled(const InlineData& data1,
                                 const InlineData& data2) {
    assert(data1.is_tree() && data2.is_tree());
    return (data1.rep_.cordz_info() | data2.rep_.cordz_info()) !=
           kNullCordzInfo;
  }

  // Returns the cordz_info sampling instance for this instance, or nullptr
  // if the current instance is not sampled and does not have CordzInfo data.
  // Requires the current instance to hold a tree value.
  CordzInfo* cordz_info() const {
    assert(is_tree());
    intptr_t info = static_cast<intptr_t>(absl::little_endian::ToHost64(
        static_cast<uint64_t>(rep_.cordz_info())));
    assert(info & 1);
    return reinterpret_cast<CordzInfo*>(info - 1);
  }

  // Sets the current cordz_info sampling instance for this instance, or nullptr
  // if the current instance is not sampled and does not have CordzInfo data.
  // Requires the current instance to hold a tree value.
  void set_cordz_info(CordzInfo* cordz_info) {
    assert(is_tree());
    uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1;
    rep_.set_cordz_info(
        static_cast<cordz_info_t>(absl::little_endian::FromHost64(info)));
  }

  // Resets the current cordz_info to null / empty.
  void clear_cordz_info() {
    assert(is_tree());
    rep_.set_cordz_info(kNullCordzInfo);
  }

  // Returns a read only pointer to the character data inside this instance.
  // Requires the current instance to hold inline data.
  const char* as_chars() const {
    assert(!is_tree());
    return rep_.as_chars();
  }

  // Returns a mutable pointer to the character data inside this instance.
  // Should be used for 'write only' operations setting an inlined value.
  // Applications can set the value of inlined data either before or after
  // setting the inlined size, i.e., both of the below are valid:
  //
  //   // Set inlined data and inline size
  //   memcpy(data_.as_chars(), data, size);
  //   data_.set_inline_size(size);
  //
  //   // Set inlined size and inline data
  //   data_.set_inline_size(size);
  //   memcpy(data_.as_chars(), data, size);
  //
  // It's an error to read from the returned pointer without a preceding write
  // if the current instance does not hold inline data, i.e.: is_tree() == true.
  char* as_chars() { return rep_.as_chars(); }

  // Returns the tree value of this value.
  // Requires the current instance to hold a tree value.
  CordRep* as_tree() const {
    assert(is_tree());
    return rep_.tree();
  }

  void set_inline_data(const char* data, size_t n) {
    ABSL_ASSERT(n <= kMaxInline);
    unpoison();
    rep_.set_tag(static_cast<int8_t>(n << 1));
    SmallMemmove<true>(rep_.as_chars(), data, n);
    poison();
  }

  void CopyInlineToString(std::string* dst) const {
    assert(!is_tree());
    // As Cord can store only 15 bytes it is smaller than std::string's
    // small string optimization buffer size. Therefore we will always trigger
    // the fast assign short path.
    //
    // Copying with a size equal to the maximum allows more efficient, wider
    // stores to be used and no branching.
    dst->assign(rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
    // After the copy we then change the size and put in a 0 byte.
    dst->erase(inline_size());
  }

  void copy_max_inline_to(char* dst) const {
    assert(!is_tree());
    memcpy(dst, rep_.SanitizerSafeCopy().as_chars(), kMaxInline);
  }

  // Initialize this instance to holding the tree value `rep`,
  // initializing the cordz_info to null, i.e.: 'not profiled'.
  void make_tree(CordRep* rep) {
    unpoison();
    rep_.make_tree(rep);
  }

  // Set the tree value of this instance to 'rep`.
  // Requires the current instance to already hold a tree value.
  // Does not affect the value of cordz_info.
  void set_tree(CordRep* rep) {
    assert(is_tree());
    rep_.set_tree(rep);
  }

  // Returns the size of the inlined character data inside this instance.
  // Requires the current instance to hold inline data.
  size_t inline_size() const { return rep_.inline_size(); }

  // Sets the size of the inlined character data inside this instance.
  // Requires `size` to be <= kMaxInline.
  // See the documentation on 'as_chars()' for more information and examples.
  void set_inline_size(size_t size) {
    unpoison();
    rep_.set_inline_size(size);
    poison();
  }

  // Compares 'this' inlined data  with rhs. The comparison is a straightforward
  // lexicographic comparison. `Compare()` returns values as follows:
  //
  //   -1  'this' InlineData instance is smaller
  //    0  the InlineData instances are equal
  //    1  'this' InlineData instance larger
  int Compare(const InlineData& rhs) const {
    return Compare(rep_.SanitizerSafeCopy(), rhs.rep_.SanitizerSafeCopy());
  }

 private:
  struct Rep {
    // See cordz_info_t for forced alignment and size of `cordz_info` details.
    struct AsTree {
      explicit constexpr AsTree(absl::cord_internal::CordRep* tree)
          : rep(tree) {}
      cordz_info_t cordz_info = kNullCordzInfo;
      absl::cord_internal::CordRep* rep;
    };

    explicit Rep(DefaultInitType) {}
    constexpr Rep() : data{0} {}
    constexpr Rep(const Rep&) = default;
    constexpr Rep& operator=(const Rep&) = default;

    explicit constexpr Rep(CordRep* rep) : as_tree(rep) {}

    explicit constexpr Rep(absl::string_view chars)
        : data{static_cast<char>((chars.size() << 1)),
               GetOrNull(chars, 0),
               GetOrNull(chars, 1),
               GetOrNull(chars, 2),
               GetOrNull(chars, 3),
               GetOrNull(chars, 4),
               GetOrNull(chars, 5),
               GetOrNull(chars, 6),
               GetOrNull(chars, 7),
               GetOrNull(chars, 8),
               GetOrNull(chars, 9),
               GetOrNull(chars, 10),
               GetOrNull(chars, 11),
               GetOrNull(chars, 12),
               GetOrNull(chars, 13),
               GetOrNull(chars, 14)} {}

#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
    // Break compiler optimization for cases when value is allocated on the
    // stack. Compiler assumes that the the variable is fully accessible
    // regardless of our poisoning.
    // Missing report: https://github.com/llvm/llvm-project/issues/100640
    const Rep* self() const {
      const Rep* volatile ptr = this;
      return ptr;
    }
    Rep* self() {
      Rep* volatile ptr = this;
      return ptr;
    }
#else
    constexpr const Rep* self() const { return this; }
    constexpr Rep* self() { return this; }
#endif

    // Disable sanitizer as we must always be able to read `tag`.
    ABSL_CORD_INTERNAL_NO_SANITIZE
    int8_t tag() const { return reinterpret_cast<const int8_t*>(this)[0]; }
    void set_tag(int8_t rhs) { reinterpret_cast<int8_t*>(self())[0] = rhs; }

    char* as_chars() { return self()->data + 1; }
    const char* as_chars() const { return self()->data + 1; }

    bool is_tree() const { return (self()->tag() & 1) != 0; }

    size_t inline_size() const {
      ABSL_ASSERT(!self()->is_tree());
      return static_cast<size_t>(self()->tag()) >> 1;
    }

    void set_inline_size(size_t size) {
      ABSL_ASSERT(size <= kMaxInline);
      self()->set_tag(static_cast<int8_t>(size << 1));
    }

    CordRep* tree() const { return self()->as_tree.rep; }
    void set_tree(CordRep* rhs) { self()->as_tree.rep = rhs; }

    cordz_info_t cordz_info() const { return self()->as_tree.cordz_info; }
    void set_cordz_info(cordz_info_t rhs) { self()->as_tree.cordz_info = rhs; }

    void make_tree(CordRep* tree) {
      self()->as_tree.rep = tree;
      self()->as_tree.cordz_info = kNullCordzInfo;
    }

#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER
    constexpr Rep SanitizerSafeCopy() const {
      if (!absl::is_constant_evaluated()) {
        Rep res;
        if (is_tree()) {
          res = *this;
        } else {
          res.set_tag(tag());
          memcpy(res.as_chars(), as_chars(), inline_size());
        }
        return res;
      } else {
        return *this;
      }
    }
#else
    constexpr const Rep& SanitizerSafeCopy() const { return *this; }
#endif

    // If the data has length <= kMaxInline, we store it in `data`, and
    // store the size in the first char of `data` shifted left + 1.
    // Else we store it in a tree and store a pointer to that tree in
    // `as_tree.rep` with a tagged pointer to make `tag() & 1` non zero.
    union {
      char data[kMaxInline + 1];
      AsTree as_tree;
    };

    // TODO(b/145829486): see swap(InlineData, InlineData) for more info.
    inline void SwapValue(Rep rhs, Rep& refrhs) {
      memcpy(&refrhs, this, sizeof(*this));
      memcpy(this, &rhs, sizeof(*this));
    }
  };

  // Private implementation of `Compare()`
  static inline int Compare(const Rep& lhs, const Rep& rhs) {
    uint64_t x, y;
    memcpy(&x, lhs.as_chars(), sizeof(x));
    memcpy(&y, rhs.as_chars(), sizeof(y));
    if (x == y) {
      memcpy(&x, lhs.as_chars() + 7, sizeof(x));
      memcpy(&y, rhs.as_chars() + 7, sizeof(y));
      if (x == y) {
        if (lhs.inline_size() == rhs.inline_size()) return 0;
        return lhs.inline_size() < rhs.inline_size() ? -1 : 1;
      }
    }
    x = absl::big_endian::FromHost64(x);
    y = absl::big_endian::FromHost64(y);
    return x < y ? -1 : 1;
  }

  Rep rep_;
};

static_assert(sizeof(InlineData) == kMaxInline + 1, "");

#ifdef ABSL_INTERNAL_CORD_HAVE_SANITIZER

constexpr InlineData::InlineData(const InlineData& rhs) noexcept
    : rep_(rhs.rep_.SanitizerSafeCopy()) {
  poison();
}

inline InlineData& InlineData::operator=(const InlineData& rhs) noexcept {
  unpoison();
  rep_ = rhs.rep_.SanitizerSafeCopy();
  poison();
  return *this;
}

constexpr void InlineData::poison_this() {
  if (!absl::is_constant_evaluated()) {
    container_internal::SanitizerPoisonObject(this);
  }
}

constexpr void InlineData::unpoison() {
  if (!absl::is_constant_evaluated()) {
    container_internal::SanitizerUnpoisonObject(this);
  }
}

constexpr void InlineData::poison() {
  if (!absl::is_constant_evaluated()) {
    if (is_tree()) {
      container_internal::SanitizerUnpoisonObject(this);
    } else if (const size_t size = inline_size()) {
      if (size < kMaxInline) {
        const char* end = rep_.as_chars() + size;
        container_internal::SanitizerPoisonMemoryRegion(end, kMaxInline - size);
      }
    } else {
      container_internal::SanitizerPoisonObject(this);
    }
  }
}

#else  // ABSL_INTERNAL_CORD_HAVE_SANITIZER

constexpr InlineData::InlineData(const InlineData&) noexcept = default;
inline InlineData& InlineData::operator=(const InlineData&) noexcept = default;

constexpr void InlineData::poison_this() {}
constexpr void InlineData::unpoison() {}
constexpr void InlineData::poison() {}

#endif  // ABSL_INTERNAL_CORD_HAVE_SANITIZER

inline CordRepSubstring* CordRep::substring() {
  assert(IsSubstring());
  return static_cast<CordRepSubstring*>(this);
}

inline const CordRepSubstring* CordRep::substring() const {
  assert(IsSubstring());
  return static_cast<const CordRepSubstring*>(this);
}

inline CordRepExternal* CordRep::external() {
  assert(IsExternal());
  return static_cast<CordRepExternal*>(this);
}

inline const CordRepExternal* CordRep::external() const {
  assert(IsExternal());
  return static_cast<const CordRepExternal*>(this);
}

inline CordRep* CordRep::Ref(CordRep* rep) {
  // ABSL_ASSUME is a workaround for
  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585
  ABSL_ASSUME(rep != nullptr);
  rep->refcount.Increment();
  return rep;
}

inline void CordRep::Unref(CordRep* rep) {
  assert(rep != nullptr);
  // Expect refcount to be 0. Avoiding the cost of an atomic decrement should
  // typically outweigh the cost of an extra branch checking for ref == 1.
  if (ABSL_PREDICT_FALSE(!rep->refcount.DecrementExpectHighRefcount())) {
    Destroy(rep);
  }
}

inline void swap(InlineData& lhs, InlineData& rhs) noexcept {
  lhs.unpoison();
  rhs.unpoison();
  // TODO(b/145829486): `std::swap(lhs.rep_, rhs.rep_)` results in bad codegen
  // on clang, spilling the temporary swap value on the stack. Since `Rep` is
  // trivial, we can make clang DTRT by calling a hand-rolled `SwapValue` where
  // we pass `rhs` both by value (register allocated) and by reference. The IR
  // then folds and inlines correctly into an optimized swap without spill.
  lhs.rep_.SwapValue(rhs.rep_, rhs.rep_);
  rhs.poison();
  lhs.poison();
}

}  // namespace cord_internal

ABSL_NAMESPACE_END
}  // namespace absl
#endif  // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_
