Storage.h (revision dc4cfdbb8f9f665c1699e6289b6edfbc8d1bb443) - OpenGrok cross reference for /llvm-project/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h

//===- Storage.h - TACO-flavored sparse tensor representation ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains definitions for the following classes:
//
// * `SparseTensorStorageBase`
// * `SparseTensorStorage<P, C, V>`
//
//===----------------------------------------------------------------------===//

#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H
#define MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H

#include "mlir/Dialect/SparseTensor/IR/Enums.h"
#include "mlir/ExecutionEngine/Float16bits.h"
#include "mlir/ExecutionEngine/SparseTensor/ArithmeticUtils.h"
#include "mlir/ExecutionEngine/SparseTensor/COO.h"
#include "mlir/ExecutionEngine/SparseTensor/MapRef.h"

namespace mlir {
namespace sparse_tensor {

//===----------------------------------------------------------------------===//
//
//  SparseTensorStorage Classes
//
//===----------------------------------------------------------------------===//

/// Abstract base class for `SparseTensorStorage<P,C,V>`. This class
/// takes responsibility for all the `<P,C,V>`-independent aspects
/// of the tensor (e.g., sizes, sparsity, mapping). In addition,
/// we use function overloading to implement "partial" method
/// specialization, which the C-API relies on to catch type errors
/// arising from our use of opaque pointers.
///
/// Because this class forms a bridge between the denotational semantics
/// of "tensors" and the operational semantics of how we store and
/// compute with them, it also distinguishes between two different
/// coordinate spaces (and their associated rank, sizes, etc).
/// Denotationally, we have the *dimensions* of the tensor represented
/// by this object.  Operationally, we have the *levels* of the storage
/// representation itself.
///
/// The *size* of an axis is the cardinality of possible coordinate
/// values along that axis (regardless of which coordinates have stored
/// element values). As such, each size must be non-zero since if any
/// axis has size-zero then the whole tensor would have trivial storage
/// (since there are no possible coordinates). Thus we use the plural
/// term *sizes* for a collection of non-zero cardinalities, and use
/// this term whenever referring to run-time cardinalities. Whereas we
/// use the term *shape* for a collection of compile-time cardinalities,
/// where zero is used to indicate cardinalities which are dynamic (i.e.,
/// unknown/unspecified at compile-time). At run-time, these dynamic
/// cardinalities will be inferred from or checked against sizes otherwise
/// specified. Thus, dynamic cardinalities always have an "immutable but
/// unknown" value; so the term "dynamic" should not be taken to indicate
/// run-time mutability.
class SparseTensorStorageBase {
protected:
  SparseTensorStorageBase(const SparseTensorStorageBase &) = default;
  SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete;

public:
  /// Constructs a new sparse-tensor storage object with the given encoding.
  SparseTensorStorageBase(uint64_t dimRank, const uint64_t *dimSizes,
                          uint64_t lvlRank, const uint64_t *lvlSizes,
                          const LevelType *lvlTypes, const uint64_t *dim2lvl,
                          const uint64_t *lvl2dim);
  virtual ~SparseTensorStorageBase() = default;

  /// Gets the number of tensor-dimensions.
  uint64_t getDimRank() const { return dimSizes.size(); }

  /// Gets the number of storage-levels.
  uint64_t getLvlRank() const { return lvlSizes.size(); }

  /// Gets the tensor-dimension sizes array.
  const std::vector<uint64_t> &getDimSizes() const { return dimSizes; }

  /// Safely looks up the size of the given tensor-dimension.
  uint64_t getDimSize(uint64_t d) const {
    assert(d < getDimRank());
    return dimSizes[d];
  }

  /// Gets the storage-level sizes array.
  const std::vector<uint64_t> &getLvlSizes() const { return lvlSizes; }

  /// Safely looks up the size of the given storage-level.
  uint64_t getLvlSize(uint64_t l) const {
    assert(l < getLvlRank());
    return lvlSizes[l];
  }

  /// Gets the level-types array.
  const std::vector<LevelType> &getLvlTypes() const { return lvlTypes; }

  /// Safely looks up the type of the given level.
  LevelType getLvlType(uint64_t l) const {
    assert(l < getLvlRank());
    return lvlTypes[l];
  }

  /// Safely checks if the level uses dense storage.
  bool isDenseLvl(uint64_t l) const { return isDenseLT(getLvlType(l)); }

  /// Safely checks if the level uses compressed storage.
  bool isCompressedLvl(uint64_t l) const {
    return isCompressedLT(getLvlType(l));
  }

  /// Safely checks if the level uses loose compressed storage.
  bool isLooseCompressedLvl(uint64_t l) const {
    return isLooseCompressedLT(getLvlType(l));
  }

  /// Safely checks if the level uses singleton storage.
  bool isSingletonLvl(uint64_t l) const { return isSingletonLT(getLvlType(l)); }

  /// Safely checks if the level uses n out of m storage.
  bool isNOutOfMLvl(uint64_t l) const { return isNOutOfMLT(getLvlType(l)); }

  /// Safely checks if the level is ordered.
  bool isOrderedLvl(uint64_t l) const { return isOrderedLT(getLvlType(l)); }

  /// Safely checks if the level is unique.
  bool isUniqueLvl(uint64_t l) const { return isUniqueLT(getLvlType(l)); }

  /// Gets positions-overhead storage for the given level.
#define DECL_GETPOSITIONS(PNAME, P)                                            \
  virtual void getPositions(std::vector<P> **, uint64_t);
  MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETPOSITIONS)
#undef DECL_GETPOSITIONS

  /// Gets coordinates-overhead storage for the given level.
#define DECL_GETCOORDINATES(INAME, C)                                          \
  virtual void getCoordinates(std::vector<C> **, uint64_t);
  MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATES)
#undef DECL_GETCOORDINATES

  /// Gets coordinates-overhead storage buffer for the given level.
#define DECL_GETCOORDINATESBUFFER(INAME, C)                                    \
  virtual void getCoordinatesBuffer(std::vector<C> **, uint64_t);
  MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATESBUFFER)
#undef DECL_GETCOORDINATESBUFFER

  /// Gets primary storage.
#define DECL_GETVALUES(VNAME, V) virtual void getValues(std::vector<V> **);
  MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETVALUES)
#undef DECL_GETVALUES

  /// Element-wise insertion in lexicographic coordinate order. The first
  /// argument is the level-coordinates for the value being inserted.
#define DECL_LEXINSERT(VNAME, V) virtual void lexInsert(const uint64_t *, V);
  MLIR_SPARSETENSOR_FOREVERY_V(DECL_LEXINSERT)
#undef DECL_LEXINSERT

  /// Expanded insertion.  Note that this method resets the
  /// values/filled-switch array back to all-zero/false while only
  /// iterating over the nonzero elements.
#define DECL_EXPINSERT(VNAME, V)                                               \
  virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t,        \
                         uint64_t);
  MLIR_SPARSETENSOR_FOREVERY_V(DECL_EXPINSERT)
#undef DECL_EXPINSERT

  /// Finalizes lexicographic insertions.
  virtual void endLexInsert() = 0;

private:
  const std::vector<uint64_t> dimSizes;
  const std::vector<uint64_t> lvlSizes;
  const std::vector<LevelType> lvlTypes;
  const std::vector<uint64_t> dim2lvlVec;
  const std::vector<uint64_t> lvl2dimVec;

protected:
  const MapRef map; // non-owning pointers into dim2lvl/lvl2dim vectors
  const bool allDense;
};

/// A memory-resident sparse tensor using a storage scheme based on
/// per-level sparse/dense annotations. This data structure provides
/// a bufferized form of a sparse tensor type. In contrast to generating
/// setup methods for each differently annotated sparse tensor, this
/// method provides a convenient "one-size-fits-all" solution that simply
/// takes an input tensor and annotations to implement all required setup
/// in a general manner.
template <typename P, typename C, typename V>
class SparseTensorStorage final : public SparseTensorStorageBase {
  /// Private constructor to share code between the other constructors.
  /// Beware that the object is not necessarily guaranteed to be in a
  /// valid state after this constructor alone; e.g., `isCompressedLvl(l)`
  /// doesn't entail `!(positions[l].empty())`.
  SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                      uint64_t lvlRank, const uint64_t *lvlSizes,
                      const LevelType *lvlTypes, const uint64_t *dim2lvl,
                      const uint64_t *lvl2dim)
      : SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
                                dim2lvl, lvl2dim),
        positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank) {}

public:
  /// Constructs a sparse tensor with the given encoding, and allocates
  /// overhead storage according to some simple heuristics. When lvlCOO
  /// is set, the sparse tensor initializes with the contents from that
  /// data structure. Otherwise, an empty sparse tensor results.
  SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                      uint64_t lvlRank, const uint64_t *lvlSizes,
                      const LevelType *lvlTypes, const uint64_t *dim2lvl,
                      const uint64_t *lvl2dim, SparseTensorCOO<V> *lvlCOO);

  /// Constructs a sparse tensor with the given encoding, and initializes
  /// the contents from the level buffers. The constructor assumes that the
  /// data provided by `lvlBufs` can be directly used to interpret the result
  /// sparse tensor and performs no integrity test on the input data.
  SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                      uint64_t lvlRank, const uint64_t *lvlSizes,
                      const LevelType *lvlTypes, const uint64_t *dim2lvl,
                      const uint64_t *lvl2dim, const intptr_t *lvlBufs);

  /// Allocates a new empty sparse tensor.
  static SparseTensorStorage<P, C, V> *
  newEmpty(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
           const uint64_t *lvlSizes, const LevelType *lvlTypes,
           const uint64_t *dim2lvl, const uint64_t *lvl2dim);

  /// Allocates a new sparse tensor and initializes it from the given COO.
  static SparseTensorStorage<P, C, V> *
  newFromCOO(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
             const uint64_t *lvlSizes, const LevelType *lvlTypes,
             const uint64_t *dim2lvl, const uint64_t *lvl2dim,
             SparseTensorCOO<V> *lvlCOO);

  /// Allocates a new sparse tensor and initialize it from the given buffers.
  static SparseTensorStorage<P, C, V> *
  newFromBuffers(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
                 const uint64_t *lvlSizes, const LevelType *lvlTypes,
                 const uint64_t *dim2lvl, const uint64_t *lvl2dim,
                 uint64_t srcRank, const intptr_t *buffers);

  ~SparseTensorStorage() final = default;

  /// Partially specialize these getter methods based on template types.
  void getPositions(std::vector<P> **out, uint64_t lvl) final {
    assert(out && "Received nullptr for out parameter");
    assert(lvl < getLvlRank());
    *out = &positions[lvl];
  }
  void getCoordinates(std::vector<C> **out, uint64_t lvl) final {
    assert(out && "Received nullptr for out parameter");
    assert(lvl < getLvlRank());
    *out = &coordinates[lvl];
  }
  void getCoordinatesBuffer(std::vector<C> **out, uint64_t lvl) final {
    assert(out && "Received nullptr for out parameter");
    assert(lvl < getLvlRank());
    // Note that the sparse tensor support library always stores COO in SoA
    // format, even when AoS is requested. This is never an issue, since all
    // actual code/library generation requests "views" into the coordinate
    // storage for the individual levels, which is trivially provided for
    // both AoS and SoA (as well as all the other storage formats). The only
    // exception is when the buffer version of coordinate storage is requested
    // (currently only for printing). In that case, we do the following
    // potentially expensive transformation to provide that view. If this
    // operation becomes more common beyond debugging, we should consider
    // implementing proper AoS in the support library as well.
    uint64_t lvlRank = getLvlRank();
    uint64_t nnz = values.size();
    crdBuffer.clear();
    crdBuffer.reserve(nnz * (lvlRank - lvl));
    for (uint64_t i = 0; i < nnz; i++) {
      for (uint64_t l = lvl; l < lvlRank; l++) {
        assert(i < coordinates[l].size());
        crdBuffer.push_back(coordinates[l][i]);
      }
    }
    *out = &crdBuffer;
  }
  void getValues(std::vector<V> **out) final {
    assert(out && "Received nullptr for out parameter");
    *out = &values;
  }

  /// Partially specialize lexicographical insertions based on template types.
  void lexInsert(const uint64_t *lvlCoords, V val) final {
    assert(lvlCoords);
    if (allDense) {
      uint64_t lvlRank = getLvlRank();
      uint64_t valIdx = 0;
      // Linearize the address.
      for (uint64_t l = 0; l < lvlRank; l++)
        valIdx = valIdx * getLvlSize(l) + lvlCoords[l];
      values[valIdx] = val;
      return;
    }
    // First, wrap up pending insertion path.
    uint64_t diffLvl = 0;
    uint64_t full = 0;
    if (!values.empty()) {
      diffLvl = lexDiff(lvlCoords);
      endPath(diffLvl + 1);
      full = lvlCursor[diffLvl] + 1;
    }
    // Then continue with insertion path.
    insPath(lvlCoords, diffLvl, full, val);
  }

  /// Partially specialize expanded insertions based on template types.
  void expInsert(uint64_t *lvlCoords, V *values, bool *filled, uint64_t *added,
                 uint64_t count, uint64_t expsz) final {
    assert((lvlCoords && values && filled && added) && "Received nullptr");
    if (count == 0)
      return;
    // Sort.
    std::sort(added, added + count);
    // Restore insertion path for first insert.
    const uint64_t lastLvl = getLvlRank() - 1;
    uint64_t c = added[0];
    assert(c <= expsz);
    assert(filled[c] && "added coordinate is not filled");
    lvlCoords[lastLvl] = c;
    lexInsert(lvlCoords, values[c]);
    values[c] = 0;
    filled[c] = false;
    // Subsequent insertions are quick.
    for (uint64_t i = 1; i < count; i++) {
      assert(c < added[i] && "non-lexicographic insertion");
      c = added[i];
      assert(c <= expsz);
      assert(filled[c] && "added coordinate is not filled");
      lvlCoords[lastLvl] = c;
      insPath(lvlCoords, lastLvl, added[i - 1] + 1, values[c]);
      values[c] = 0;
      filled[c] = false;
    }
  }

  /// Finalizes lexicographic insertions.
  void endLexInsert() final {
    if (!allDense) {
      if (values.empty())
        finalizeSegment(0);
      else
        endPath(0);
    }
  }

  /// Sort the unordered tensor in place, the method assumes that it is
  /// an unordered COO tensor.
  void sortInPlace() {
    uint64_t nnz = values.size();
#ifndef NDEBUG
    for (uint64_t l = 0; l < getLvlRank(); l++)
      assert(nnz == coordinates[l].size());
#endif

    // In-place permutation.
    auto applyPerm = [this](std::vector<uint64_t> &perm) {
      uint64_t length = perm.size();
      uint64_t lvlRank = getLvlRank();
      // Cache for the current level coordinates.
      std::vector<P> lvlCrds(lvlRank);
      for (uint64_t i = 0; i < length; i++) {
        uint64_t current = i;
        if (i != perm[current]) {
          for (uint64_t l = 0; l < lvlRank; l++)
            lvlCrds[l] = coordinates[l][i];
          V val = values[i];
          // Deals with a permutation cycle.
          while (i != perm[current]) {
            uint64_t next = perm[current];
            // Swaps the level coordinates and value.
            for (uint64_t l = 0; l < lvlRank; l++)
              coordinates[l][current] = coordinates[l][next];
            values[current] = values[next];
            perm[current] = current;
            current = next;
          }
          for (uint64_t l = 0; l < lvlRank; l++)
            coordinates[l][current] = lvlCrds[l];
          values[current] = val;
          perm[current] = current;
        }
      }
    };

    std::vector<uint64_t> sortedIdx(nnz, 0);
    for (uint64_t i = 0; i < nnz; i++)
      sortedIdx[i] = i;

    std::sort(sortedIdx.begin(), sortedIdx.end(),
              [this](uint64_t lhs, uint64_t rhs) {
                for (uint64_t l = 0; l < getLvlRank(); l++) {
                  if (coordinates[l][lhs] == coordinates[l][rhs])
                    continue;
                  return coordinates[l][lhs] < coordinates[l][rhs];
                }
                assert(lhs == rhs && "duplicate coordinates");
                return false;
              });

    applyPerm(sortedIdx);
  }

private:
  /// Appends coordinate `crd` to level `lvl`, in the semantically
  /// general sense.  For non-dense levels, that means appending to the
  /// `coordinates[lvl]` array, checking that `crd` is representable in
  /// the `C` type; however, we do not verify other semantic requirements
  /// (e.g., that `crd` is in bounds for `lvlSizes[lvl]`, and not previously
  /// occurring in the same segment).  For dense levels, this method instead
  /// appends the appropriate number of zeros to the `values` array, where
  /// `full` is the number of "entries" already written to `values` for this
  /// segment (aka one after the highest coordinate previously appended).
  void appendCrd(uint64_t lvl, uint64_t full, uint64_t crd) {
    if (!isDenseLvl(lvl)) {
      assert(isCompressedLvl(lvl) || isLooseCompressedLvl(lvl) ||
             isSingletonLvl(lvl) || isNOutOfMLvl(lvl));
      coordinates[lvl].push_back(detail::checkOverflowCast<C>(crd));
    } else { // Dense level.
      assert(crd >= full && "Coordinate was already filled");
      if (crd == full)
        return; // Short-circuit, since it'll be a nop.
      if (lvl + 1 == getLvlRank())
        values.insert(values.end(), crd - full, 0);
      else
        finalizeSegment(lvl + 1, 0, crd - full);
    }
  }

  /// Computes the assembled-size associated with the `l`-th level,
  /// given the assembled-size associated with the `(l-1)`-th level.
  uint64_t assembledSize(uint64_t parentSz, uint64_t l) const {
    if (isCompressedLvl(l))
      return positions[l][parentSz];
    if (isLooseCompressedLvl(l))
      return positions[l][2 * parentSz - 1];
    if (isSingletonLvl(l) || isNOutOfMLvl(l))
      return parentSz; // new size same as the parent
    assert(isDenseLvl(l));
    return parentSz * getLvlSize(l);
  }

  /// Initializes sparse tensor storage scheme from a memory-resident sparse
  /// tensor in coordinate scheme. This method prepares the positions and
  /// coordinates arrays under the given per-level dense/sparse annotations.
  void fromCOO(const std::vector<Element<V>> &lvlElements, uint64_t lo,
               uint64_t hi, uint64_t l) {
    const uint64_t lvlRank = getLvlRank();
    assert(l <= lvlRank && hi <= lvlElements.size());
    // Once levels are exhausted, insert the numerical values.
    if (l == lvlRank) {
      assert(lo < hi);
      values.push_back(lvlElements[lo].value);
      return;
    }
    // Visit all elements in this interval.
    uint64_t full = 0;
    while (lo < hi) { // If `hi` is unchanged, then `lo < lvlElements.size()`.
      // Find segment in interval with same coordinate at this level.
      const uint64_t c = lvlElements[lo].coords[l];
      uint64_t seg = lo + 1;
      if (isUniqueLvl(l))
        while (seg < hi && lvlElements[seg].coords[l] == c)
          seg++;
      // Handle segment in interval for sparse or dense level.
      appendCrd(l, full, c);
      full = c + 1;
      fromCOO(lvlElements, lo, seg, l + 1);
      // And move on to next segment in interval.
      lo = seg;
    }
    // Finalize the sparse position structure at this level.
    finalizeSegment(l, full);
  }

  /// Finalizes the sparse position structure at this level.
  void finalizeSegment(uint64_t l, uint64_t full = 0, uint64_t count = 1) {
    if (count == 0)
      return; // Short-circuit, since it'll be a nop.
    if (isCompressedLvl(l)) {
      uint64_t pos = coordinates[l].size();
      positions[l].insert(positions[l].end(), count,
                          detail::checkOverflowCast<P>(pos));
    } else if (isLooseCompressedLvl(l)) {
      // Finish this level, and push pairs for the empty ones, and one
      // more for next level. Note that this always leaves one extra
      // unused element at the end.
      uint64_t pos = coordinates[l].size();
      positions[l].insert(positions[l].end(), 2 * count,
                          detail::checkOverflowCast<P>(pos));
    } else if (isSingletonLvl(l) || isNOutOfMLvl(l)) {
      return; // Nothing to finalize.
    } else {  // Dense dimension.
      assert(isDenseLvl(l));
      const uint64_t sz = getLvlSizes()[l];
      assert(sz >= full && "Segment is overfull");
      count = detail::checkedMul(count, sz - full);
      // For dense storage we must enumerate all the remaining coordinates
      // in this level (i.e., coordinates after the last non-zero
      // element), and either fill in their zero values or else recurse
      // to finalize some deeper level.
      if (l + 1 == getLvlRank())
        values.insert(values.end(), count, 0);
      else
        finalizeSegment(l + 1, 0, count);
    }
  }

  /// Wraps up a single insertion path, inner to outer.
  void endPath(uint64_t diffLvl) {
    const uint64_t lvlRank = getLvlRank();
    const uint64_t lastLvl = lvlRank - 1;
    assert(diffLvl <= lvlRank);
    const uint64_t stop = lvlRank - diffLvl;
    for (uint64_t i = 0; i < stop; i++) {
      const uint64_t l = lastLvl - i;
      finalizeSegment(l, lvlCursor[l] + 1);
    }
  }

  /// Continues a single insertion path, outer to inner. The first
  /// argument is the level-coordinates for the value being inserted.
  void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full,
               V val) {
    const uint64_t lvlRank = getLvlRank();
    assert(diffLvl <= lvlRank);
    for (uint64_t l = diffLvl; l < lvlRank; l++) {
      const uint64_t c = lvlCoords[l];
      appendCrd(l, full, c);
      full = 0;
      lvlCursor[l] = c;
    }
    values.push_back(val);
  }

  /// Finds the lexicographically first level where the level-coordinates
  /// in the argument differ from those in the current cursor.
  uint64_t lexDiff(const uint64_t *lvlCoords) const {
    const uint64_t lvlRank = getLvlRank();
    for (uint64_t l = 0; l < lvlRank; l++) {
      const auto crd = lvlCoords[l];
      const auto cur = lvlCursor[l];
      if (crd > cur || (crd == cur && !isUniqueLvl(l)) ||
          (crd < cur && !isOrderedLvl(l))) {
        return l;
      }
      if (crd < cur) {
        assert(false && "non-lexicographic insertion");
        return -1u;
      }
    }
    assert(false && "duplicate insertion");
    return -1u;
  }

  // Sparse tensor storage components.
  std::vector<std::vector<P>> positions;
  std::vector<std::vector<C>> coordinates;
  std::vector<V> values;

  // Auxiliary data structures.
  std::vector<uint64_t> lvlCursor;
  std::vector<C> crdBuffer; // just for AoS view
};

//===----------------------------------------------------------------------===//
//
//  SparseTensorStorage Factories
//
//===----------------------------------------------------------------------===//

template <typename P, typename C, typename V>
SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newEmpty(
    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
    const uint64_t *lvlSizes, const LevelType *lvlTypes,
    const uint64_t *dim2lvl, const uint64_t *lvl2dim) {
  SparseTensorCOO<V> *noLvlCOO = nullptr;
  return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
                                          lvlTypes, dim2lvl, lvl2dim, noLvlCOO);
}

template <typename P, typename C, typename V>
SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromCOO(
    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
    const uint64_t *lvlSizes, const LevelType *lvlTypes,
    const uint64_t *dim2lvl, const uint64_t *lvl2dim,
    SparseTensorCOO<V> *lvlCOO) {
  assert(lvlCOO);
  return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
                                          lvlTypes, dim2lvl, lvl2dim, lvlCOO);
}

template <typename P, typename C, typename V>
SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromBuffers(
    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
    const uint64_t *lvlSizes, const LevelType *lvlTypes,
    const uint64_t *dim2lvl, const uint64_t *lvl2dim, uint64_t srcRank,
    const intptr_t *buffers) {
  return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
                                          lvlTypes, dim2lvl, lvl2dim, buffers);
}

//===----------------------------------------------------------------------===//
//
//  SparseTensorStorage Constructors
//
//===----------------------------------------------------------------------===//

template <typename P, typename C, typename V>
SparseTensorStorage<P, C, V>::SparseTensorStorage(
    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
    const uint64_t *lvlSizes, const LevelType *lvlTypes,
    const uint64_t *dim2lvl, const uint64_t *lvl2dim,
    SparseTensorCOO<V> *lvlCOO)
    : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
                          dim2lvl, lvl2dim) {
  // Provide hints on capacity of positions and coordinates.
  // TODO: needs much fine-tuning based on actual sparsity; currently
  // we reserve position/coordinate space based on all previous dense
  // levels, which works well up to first sparse level; but we should
  // really use nnz and dense/sparse distribution.
  uint64_t sz = 1;
  for (uint64_t l = 0; l < lvlRank; l++) {
    if (isCompressedLvl(l)) {
      positions[l].reserve(sz + 1);
      positions[l].push_back(0);
      coordinates[l].reserve(sz);
      sz = 1;
    } else if (isLooseCompressedLvl(l)) {
      positions[l].reserve(2 * sz + 1); // last one unused
      positions[l].push_back(0);
      coordinates[l].reserve(sz);
      sz = 1;
    } else if (isSingletonLvl(l)) {
      coordinates[l].reserve(sz);
      sz = 1;
    } else if (isNOutOfMLvl(l)) {
      assert(l == lvlRank - 1 && "unexpected n:m usage");
      sz = detail::checkedMul(sz, lvlSizes[l]) / 2;
      coordinates[l].reserve(sz);
      values.reserve(sz);
    } else { // Dense level.
      assert(isDenseLvl(l));
      sz = detail::checkedMul(sz, lvlSizes[l]);
    }
  }
  if (lvlCOO) {
    /* New from COO: ensure it is sorted. */
    assert(lvlCOO->getRank() == lvlRank);
    lvlCOO->sort();
    // Now actually insert the `elements`.
    const auto &elements = lvlCOO->getElements();
    const uint64_t nse = elements.size();
    assert(values.size() == 0);
    values.reserve(nse);
    fromCOO(elements, 0, nse, 0);
  } else if (allDense) {
    /* New empty (all dense) */
    values.resize(sz, 0);
  }
}

template <typename P, typename C, typename V>
SparseTensorStorage<P, C, V>::SparseTensorStorage(
    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
    const uint64_t *lvlSizes, const LevelType *lvlTypes,
    const uint64_t *dim2lvl, const uint64_t *lvl2dim, const intptr_t *lvlBufs)
    : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
                          dim2lvl, lvl2dim) {
  // Note that none of the buffers can be reused because ownership
  // of the memory passed from clients is not necessarily transferred.
  // Therefore, all data is copied over into a new SparseTensorStorage.
  uint64_t trailCOOLen = 0, parentSz = 1, bufIdx = 0;
  for (uint64_t l = 0; l < lvlRank; l++) {
    if (!isUniqueLvl(l) && (isCompressedLvl(l) || isLooseCompressedLvl(l))) {
      // A `(loose)compressed_nu` level marks the start of trailing COO
      // start level. Since the coordinate buffer used for trailing COO
      // is passed in as AoS scheme and SparseTensorStorage uses a SoA
      // scheme, we cannot simply copy the value from the provided buffers.
      trailCOOLen = lvlRank - l;
      break;
    }
    if (isCompressedLvl(l) || isLooseCompressedLvl(l)) {
      P *posPtr = reinterpret_cast<P *>(lvlBufs[bufIdx++]);
      C *crdPtr = reinterpret_cast<C *>(lvlBufs[bufIdx++]);
      if (isLooseCompressedLvl(l)) {
        positions[l].assign(posPtr, posPtr + 2 * parentSz);
        coordinates[l].assign(crdPtr, crdPtr + positions[l][2 * parentSz - 1]);
      } else {
        positions[l].assign(posPtr, posPtr + parentSz + 1);
        coordinates[l].assign(crdPtr, crdPtr + positions[l][parentSz]);
      }
    } else if (isSingletonLvl(l)) {
      assert(0 && "general singleton not supported yet");
    } else if (isNOutOfMLvl(l)) {
      assert(0 && "n ouf of m not supported yet");
    } else {
      assert(isDenseLvl(l));
    }
    parentSz = assembledSize(parentSz, l);
  }

  // Handle Aos vs. SoA mismatch for COO.
  if (trailCOOLen != 0) {
    uint64_t cooStartLvl = lvlRank - trailCOOLen;
    assert(!isUniqueLvl(cooStartLvl) &&
           (isCompressedLvl(cooStartLvl) || isLooseCompressedLvl(cooStartLvl)));
    P *posPtr = reinterpret_cast<P *>(lvlBufs[bufIdx++]);
    C *aosCrdPtr = reinterpret_cast<C *>(lvlBufs[bufIdx++]);
    P crdLen;
    if (isLooseCompressedLvl(cooStartLvl)) {
      positions[cooStartLvl].assign(posPtr, posPtr + 2 * parentSz);
      crdLen = positions[cooStartLvl][2 * parentSz - 1];
    } else {
      positions[cooStartLvl].assign(posPtr, posPtr + parentSz + 1);
      crdLen = positions[cooStartLvl][parentSz];
    }
    for (uint64_t l = cooStartLvl; l < lvlRank; l++) {
      coordinates[l].resize(crdLen);
      for (uint64_t n = 0; n < crdLen; n++) {
        coordinates[l][n] = *(aosCrdPtr + (l - cooStartLvl) + n * trailCOOLen);
      }
    }
    parentSz = assembledSize(parentSz, cooStartLvl);
  }

  // Copy the values buffer.
  V *valPtr = reinterpret_cast<V *>(lvlBufs[bufIdx]);
  values.assign(valPtr, valPtr + parentSz);
}

} // namespace sparse_tensor
} // namespace mlir

#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_STORAGE_H