10b57cec5SDimitry Andric //===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file contains the implementation of formatted_raw_ostream.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "llvm/Support/FormattedStream.h"
145ffd83dbSDimitry Andric #include "llvm/Support/ConvertUTF.h"
150b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
165ffd83dbSDimitry Andric #include "llvm/Support/Unicode.h"
170b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
180b57cec5SDimitry Andric #include <algorithm>
190b57cec5SDimitry Andric
200b57cec5SDimitry Andric using namespace llvm;
210b57cec5SDimitry Andric
220b57cec5SDimitry Andric /// UpdatePosition - Examine the given char sequence and figure out which
230b57cec5SDimitry Andric /// column we end up in after output, and how many line breaks are contained.
245ffd83dbSDimitry Andric /// This assumes that the input string is well-formed UTF-8, and takes into
255ffd83dbSDimitry Andric /// account Unicode characters which render as multiple columns wide.
UpdatePosition(const char * Ptr,size_t Size)265ffd83dbSDimitry Andric void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) {
270b57cec5SDimitry Andric unsigned &Column = Position.first;
280b57cec5SDimitry Andric unsigned &Line = Position.second;
290b57cec5SDimitry Andric
305ffd83dbSDimitry Andric auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) {
315ffd83dbSDimitry Andric int Width = sys::unicode::columnWidthUTF8(CP);
325ffd83dbSDimitry Andric if (Width != sys::unicode::ErrorNonPrintableCharacter)
335ffd83dbSDimitry Andric Column += Width;
345ffd83dbSDimitry Andric
355ffd83dbSDimitry Andric // The only special whitespace characters we care about are single-byte.
365ffd83dbSDimitry Andric if (CP.size() > 1)
375ffd83dbSDimitry Andric return;
385ffd83dbSDimitry Andric
395ffd83dbSDimitry Andric switch (CP[0]) {
400b57cec5SDimitry Andric case '\n':
410b57cec5SDimitry Andric Line += 1;
42bdd1243dSDimitry Andric [[fallthrough]];
430b57cec5SDimitry Andric case '\r':
440b57cec5SDimitry Andric Column = 0;
450b57cec5SDimitry Andric break;
460b57cec5SDimitry Andric case '\t':
470b57cec5SDimitry Andric // Assumes tab stop = 8 characters.
480b57cec5SDimitry Andric Column += (8 - (Column & 0x7)) & 0x7;
490b57cec5SDimitry Andric break;
500b57cec5SDimitry Andric }
515ffd83dbSDimitry Andric };
525ffd83dbSDimitry Andric
535ffd83dbSDimitry Andric // If we have a partial UTF-8 sequence from the previous buffer, check that
545ffd83dbSDimitry Andric // first.
555ffd83dbSDimitry Andric if (PartialUTF8Char.size()) {
565ffd83dbSDimitry Andric size_t BytesFromBuffer =
575ffd83dbSDimitry Andric getNumBytesForUTF8(PartialUTF8Char[0]) - PartialUTF8Char.size();
585ffd83dbSDimitry Andric if (Size < BytesFromBuffer) {
595ffd83dbSDimitry Andric // If we still don't have enough bytes for a complete code point, just
605ffd83dbSDimitry Andric // append what we have.
615ffd83dbSDimitry Andric PartialUTF8Char.append(StringRef(Ptr, Size));
625ffd83dbSDimitry Andric return;
635ffd83dbSDimitry Andric } else {
645ffd83dbSDimitry Andric // The first few bytes from the buffer will complete the code point.
655ffd83dbSDimitry Andric // Concatenate them and process their effect on the line and column
665ffd83dbSDimitry Andric // numbers.
675ffd83dbSDimitry Andric PartialUTF8Char.append(StringRef(Ptr, BytesFromBuffer));
685ffd83dbSDimitry Andric ProcessUTF8CodePoint(PartialUTF8Char);
695ffd83dbSDimitry Andric PartialUTF8Char.clear();
705ffd83dbSDimitry Andric Ptr += BytesFromBuffer;
715ffd83dbSDimitry Andric Size -= BytesFromBuffer;
725ffd83dbSDimitry Andric }
735ffd83dbSDimitry Andric }
745ffd83dbSDimitry Andric
755ffd83dbSDimitry Andric // Now scan the rest of the buffer.
765ffd83dbSDimitry Andric unsigned NumBytes;
775ffd83dbSDimitry Andric for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) {
785ffd83dbSDimitry Andric NumBytes = getNumBytesForUTF8(*Ptr);
795ffd83dbSDimitry Andric
805ffd83dbSDimitry Andric // The buffer might end part way through a UTF-8 code unit sequence for a
815ffd83dbSDimitry Andric // Unicode scalar value if it got flushed. If this happens, we can't know
825ffd83dbSDimitry Andric // the display width until we see the rest of the code point. Stash the
835ffd83dbSDimitry Andric // bytes we do have, so that we can reconstruct the whole code point later,
845ffd83dbSDimitry Andric // even if the buffer is being flushed.
855ffd83dbSDimitry Andric if ((unsigned)(End - Ptr) < NumBytes) {
865ffd83dbSDimitry Andric PartialUTF8Char = StringRef(Ptr, End - Ptr);
875ffd83dbSDimitry Andric return;
885ffd83dbSDimitry Andric }
895ffd83dbSDimitry Andric
905ffd83dbSDimitry Andric ProcessUTF8CodePoint(StringRef(Ptr, NumBytes));
910b57cec5SDimitry Andric }
920b57cec5SDimitry Andric }
930b57cec5SDimitry Andric
940b57cec5SDimitry Andric /// ComputePosition - Examine the current output and update line and column
950b57cec5SDimitry Andric /// counts.
ComputePosition(const char * Ptr,size_t Size)960b57cec5SDimitry Andric void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
97*439352acSDimitry Andric if (DisableScan)
98*439352acSDimitry Andric return;
99*439352acSDimitry Andric
1000b57cec5SDimitry Andric // If our previous scan pointer is inside the buffer, assume we already
1010b57cec5SDimitry Andric // scanned those bytes. This depends on raw_ostream to not change our buffer
1020b57cec5SDimitry Andric // in unexpected ways.
1030b57cec5SDimitry Andric if (Ptr <= Scanned && Scanned <= Ptr + Size)
1040b57cec5SDimitry Andric // Scan all characters added since our last scan to determine the new
1050b57cec5SDimitry Andric // column.
1065ffd83dbSDimitry Andric UpdatePosition(Scanned, Size - (Scanned - Ptr));
1070b57cec5SDimitry Andric else
1085ffd83dbSDimitry Andric UpdatePosition(Ptr, Size);
1090b57cec5SDimitry Andric
1100b57cec5SDimitry Andric // Update the scanning pointer.
1110b57cec5SDimitry Andric Scanned = Ptr + Size;
1120b57cec5SDimitry Andric }
1130b57cec5SDimitry Andric
1140b57cec5SDimitry Andric /// PadToColumn - Align the output to some column number.
1150b57cec5SDimitry Andric ///
1160b57cec5SDimitry Andric /// \param NewCol - The column to move to.
1170b57cec5SDimitry Andric ///
PadToColumn(unsigned NewCol)1180b57cec5SDimitry Andric formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
1190b57cec5SDimitry Andric // Figure out what's in the buffer and add it to the column count.
1200b57cec5SDimitry Andric ComputePosition(getBufferStart(), GetNumBytesInBuffer());
1210b57cec5SDimitry Andric
1220b57cec5SDimitry Andric // Output spaces until we reach the desired column.
1230b57cec5SDimitry Andric indent(std::max(int(NewCol - getColumn()), 1));
1240b57cec5SDimitry Andric return *this;
1250b57cec5SDimitry Andric }
1260b57cec5SDimitry Andric
write_impl(const char * Ptr,size_t Size)1270b57cec5SDimitry Andric void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
1280b57cec5SDimitry Andric // Figure out what's in the buffer and add it to the column count.
1290b57cec5SDimitry Andric ComputePosition(Ptr, Size);
1300b57cec5SDimitry Andric
1310b57cec5SDimitry Andric // Write the data to the underlying stream (which is unbuffered, so
1320b57cec5SDimitry Andric // the data will be immediately written out).
1330b57cec5SDimitry Andric TheStream->write(Ptr, Size);
1340b57cec5SDimitry Andric
1350b57cec5SDimitry Andric // Reset the scanning pointer.
1360b57cec5SDimitry Andric Scanned = nullptr;
1370b57cec5SDimitry Andric }
1380b57cec5SDimitry Andric
1390b57cec5SDimitry Andric /// fouts() - This returns a reference to a formatted_raw_ostream for
1400b57cec5SDimitry Andric /// standard output. Use it like: fouts() << "foo" << "bar";
fouts()1410b57cec5SDimitry Andric formatted_raw_ostream &llvm::fouts() {
1420b57cec5SDimitry Andric static formatted_raw_ostream S(outs());
1430b57cec5SDimitry Andric return S;
1440b57cec5SDimitry Andric }
1450b57cec5SDimitry Andric
1460b57cec5SDimitry Andric /// ferrs() - This returns a reference to a formatted_raw_ostream for
1470b57cec5SDimitry Andric /// standard error. Use it like: ferrs() << "foo" << "bar";
ferrs()1480b57cec5SDimitry Andric formatted_raw_ostream &llvm::ferrs() {
1490b57cec5SDimitry Andric static formatted_raw_ostream S(errs());
1500b57cec5SDimitry Andric return S;
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric
1530b57cec5SDimitry Andric /// fdbgs() - This returns a reference to a formatted_raw_ostream for
1540b57cec5SDimitry Andric /// the debug stream. Use it like: fdbgs() << "foo" << "bar";
fdbgs()1550b57cec5SDimitry Andric formatted_raw_ostream &llvm::fdbgs() {
1560b57cec5SDimitry Andric static formatted_raw_ostream S(dbgs());
1570b57cec5SDimitry Andric return S;
1580b57cec5SDimitry Andric }
159