lib/Support/FormattedStream.cpp

0b57cec5SDimitry Andric//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
0b57cec5SDimitry Andric//
0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0b57cec5SDimitry Andric//
0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
0b57cec5SDimitry Andric//
0b57cec5SDimitry Andric// This file contains the implementation of formatted_raw_ostream.
0b57cec5SDimitry Andric//
0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric#include "llvm/Support/FormattedStream.h"
5ffd83dbSDimitry Andric#include "llvm/Support/ConvertUTF.h"
0b57cec5SDimitry Andric#include "llvm/Support/Debug.h"
5ffd83dbSDimitry Andric#include "llvm/Support/Unicode.h"
0b57cec5SDimitry Andric#include "llvm/Support/raw_ostream.h"
0b57cec5SDimitry Andric#include <algorithm>
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricusing namespace llvm;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric/// UpdatePosition - Examine the given char sequence and figure out which
0b57cec5SDimitry Andric/// column we end up in after output, and how many line breaks are contained.
5ffd83dbSDimitry Andric/// This assumes that the input string is well-formed UTF-8, and takes into
5ffd83dbSDimitry Andric/// account Unicode characters which render as multiple columns wide.
5ffd83dbSDimitry Andricvoid formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) {
0b57cec5SDimitry Andric  unsigned &Column = Position.first;
0b57cec5SDimitry Andric  unsigned &Line = Position.second;
0b57cec5SDimitry Andric
5ffd83dbSDimitry Andric  auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) {
5ffd83dbSDimitry Andric    int Width = sys::unicode::columnWidthUTF8(CP);
5ffd83dbSDimitry Andric    if (Width != sys::unicode::ErrorNonPrintableCharacter)
5ffd83dbSDimitry Andric      Column += Width;
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andric    // The only special whitespace characters we care about are single-byte.
5ffd83dbSDimitry Andric    if (CP.size() > 1)
5ffd83dbSDimitry Andric      return;
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andric    switch (CP[0]) {
0b57cec5SDimitry Andric    case '\n':
0b57cec5SDimitry Andric      Line += 1;
bdd1243dSDimitry Andric      [[fallthrough]];
0b57cec5SDimitry Andric    case '\r':
0b57cec5SDimitry Andric      Column = 0;
0b57cec5SDimitry Andric      break;
0b57cec5SDimitry Andric    case '\t':
0b57cec5SDimitry Andric      // Assumes tab stop = 8 characters.
0b57cec5SDimitry Andric      Column += (8 - (Column & 0x7)) & 0x7;
0b57cec5SDimitry Andric      break;
0b57cec5SDimitry Andric    }
5ffd83dbSDimitry Andric  };
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andric  // If we have a partial UTF-8 sequence from the previous buffer, check that
5ffd83dbSDimitry Andric  // first.
5ffd83dbSDimitry Andric  if (PartialUTF8Char.size()) {
5ffd83dbSDimitry Andric    size_t BytesFromBuffer =
5ffd83dbSDimitry Andric        getNumBytesForUTF8(PartialUTF8Char[0]) - PartialUTF8Char.size();
5ffd83dbSDimitry Andric    if (Size < BytesFromBuffer) {
5ffd83dbSDimitry Andric      // If we still don't have enough bytes for a complete code point, just
5ffd83dbSDimitry Andric      // append what we have.
5ffd83dbSDimitry Andric      PartialUTF8Char.append(StringRef(Ptr, Size));
5ffd83dbSDimitry Andric      return;
5ffd83dbSDimitry Andric    } else {
5ffd83dbSDimitry Andric      // The first few bytes from the buffer will complete the code point.
5ffd83dbSDimitry Andric      // Concatenate them and process their effect on the line and column
5ffd83dbSDimitry Andric      // numbers.
5ffd83dbSDimitry Andric      PartialUTF8Char.append(StringRef(Ptr, BytesFromBuffer));
5ffd83dbSDimitry Andric      ProcessUTF8CodePoint(PartialUTF8Char);
5ffd83dbSDimitry Andric      PartialUTF8Char.clear();
5ffd83dbSDimitry Andric      Ptr += BytesFromBuffer;
5ffd83dbSDimitry Andric      Size -= BytesFromBuffer;
5ffd83dbSDimitry Andric    }
5ffd83dbSDimitry Andric  }
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andric  // Now scan the rest of the buffer.
5ffd83dbSDimitry Andric  unsigned NumBytes;
5ffd83dbSDimitry Andric  for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) {
5ffd83dbSDimitry Andric    NumBytes = getNumBytesForUTF8(*Ptr);
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andric    // The buffer might end part way through a UTF-8 code unit sequence for a
5ffd83dbSDimitry Andric    // Unicode scalar value if it got flushed. If this happens, we can't know
5ffd83dbSDimitry Andric    // the display width until we see the rest of the code point. Stash the
5ffd83dbSDimitry Andric    // bytes we do have, so that we can reconstruct the whole code point later,
5ffd83dbSDimitry Andric    // even if the buffer is being flushed.
5ffd83dbSDimitry Andric    if ((unsigned)(End - Ptr) < NumBytes) {
5ffd83dbSDimitry Andric      PartialUTF8Char = StringRef(Ptr, End - Ptr);
5ffd83dbSDimitry Andric      return;
5ffd83dbSDimitry Andric    }
5ffd83dbSDimitry Andric
5ffd83dbSDimitry Andric    ProcessUTF8CodePoint(StringRef(Ptr, NumBytes));
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric/// ComputePosition - Examine the current output and update line and column
0b57cec5SDimitry Andric/// counts.
0b57cec5SDimitry Andricvoid formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
*439352acSDimitry Andric  if (DisableScan)
*439352acSDimitry Andric    return;
*439352acSDimitry Andric
0b57cec5SDimitry Andric  // If our previous scan pointer is inside the buffer, assume we already
0b57cec5SDimitry Andric  // scanned those bytes. This depends on raw_ostream to not change our buffer
0b57cec5SDimitry Andric  // in unexpected ways.
0b57cec5SDimitry Andric  if (Ptr <= Scanned && Scanned <= Ptr + Size)
0b57cec5SDimitry Andric    // Scan all characters added since our last scan to determine the new
0b57cec5SDimitry Andric    // column.
5ffd83dbSDimitry Andric    UpdatePosition(Scanned, Size - (Scanned - Ptr));
0b57cec5SDimitry Andric  else
5ffd83dbSDimitry Andric    UpdatePosition(Ptr, Size);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // Update the scanning pointer.
0b57cec5SDimitry Andric  Scanned = Ptr + Size;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric/// PadToColumn - Align the output to some column number.
0b57cec5SDimitry Andric///
0b57cec5SDimitry Andric/// \param NewCol - The column to move to.
0b57cec5SDimitry Andric///
0b57cec5SDimitry Andricformatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
0b57cec5SDimitry Andric  // Figure out what's in the buffer and add it to the column count.
0b57cec5SDimitry Andric  ComputePosition(getBufferStart(), GetNumBytesInBuffer());
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // Output spaces until we reach the desired column.
0b57cec5SDimitry Andric  indent(std::max(int(NewCol - getColumn()), 1));
0b57cec5SDimitry Andric  return *this;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricvoid formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
0b57cec5SDimitry Andric  // Figure out what's in the buffer and add it to the column count.
0b57cec5SDimitry Andric  ComputePosition(Ptr, Size);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // Write the data to the underlying stream (which is unbuffered, so
0b57cec5SDimitry Andric  // the data will be immediately written out).
0b57cec5SDimitry Andric  TheStream->write(Ptr, Size);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // Reset the scanning pointer.
0b57cec5SDimitry Andric  Scanned = nullptr;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric/// fouts() - This returns a reference to a formatted_raw_ostream for
0b57cec5SDimitry Andric/// standard output.  Use it like: fouts() << "foo" << "bar";
0b57cec5SDimitry Andricformatted_raw_ostream &llvm::fouts() {
0b57cec5SDimitry Andric  static formatted_raw_ostream S(outs());
0b57cec5SDimitry Andric  return S;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric/// ferrs() - This returns a reference to a formatted_raw_ostream for
0b57cec5SDimitry Andric/// standard error.  Use it like: ferrs() << "foo" << "bar";
0b57cec5SDimitry Andricformatted_raw_ostream &llvm::ferrs() {
0b57cec5SDimitry Andric  static formatted_raw_ostream S(errs());
0b57cec5SDimitry Andric  return S;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric/// fdbgs() - This returns a reference to a formatted_raw_ostream for
0b57cec5SDimitry Andric/// the debug stream.  Use it like: fdbgs() << "foo" << "bar";
0b57cec5SDimitry Andricformatted_raw_ostream &llvm::fdbgs() {
0b57cec5SDimitry Andric  static formatted_raw_ostream S(dbgs());
0b57cec5SDimitry Andric  return S;
0b57cec5SDimitry Andric}