xref: /freebsd-src/contrib/llvm-project/llvm/lib/Support/FormattedStream.cpp (revision 439352ac8257c8419cb4a662abb7f260f31f9932)
10b57cec5SDimitry Andric //===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file contains the implementation of formatted_raw_ostream.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "llvm/Support/FormattedStream.h"
145ffd83dbSDimitry Andric #include "llvm/Support/ConvertUTF.h"
150b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
165ffd83dbSDimitry Andric #include "llvm/Support/Unicode.h"
170b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
180b57cec5SDimitry Andric #include <algorithm>
190b57cec5SDimitry Andric 
200b57cec5SDimitry Andric using namespace llvm;
210b57cec5SDimitry Andric 
220b57cec5SDimitry Andric /// UpdatePosition - Examine the given char sequence and figure out which
230b57cec5SDimitry Andric /// column we end up in after output, and how many line breaks are contained.
245ffd83dbSDimitry Andric /// This assumes that the input string is well-formed UTF-8, and takes into
255ffd83dbSDimitry Andric /// account Unicode characters which render as multiple columns wide.
UpdatePosition(const char * Ptr,size_t Size)265ffd83dbSDimitry Andric void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) {
270b57cec5SDimitry Andric   unsigned &Column = Position.first;
280b57cec5SDimitry Andric   unsigned &Line = Position.second;
290b57cec5SDimitry Andric 
305ffd83dbSDimitry Andric   auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) {
315ffd83dbSDimitry Andric     int Width = sys::unicode::columnWidthUTF8(CP);
325ffd83dbSDimitry Andric     if (Width != sys::unicode::ErrorNonPrintableCharacter)
335ffd83dbSDimitry Andric       Column += Width;
345ffd83dbSDimitry Andric 
355ffd83dbSDimitry Andric     // The only special whitespace characters we care about are single-byte.
365ffd83dbSDimitry Andric     if (CP.size() > 1)
375ffd83dbSDimitry Andric       return;
385ffd83dbSDimitry Andric 
395ffd83dbSDimitry Andric     switch (CP[0]) {
400b57cec5SDimitry Andric     case '\n':
410b57cec5SDimitry Andric       Line += 1;
42bdd1243dSDimitry Andric       [[fallthrough]];
430b57cec5SDimitry Andric     case '\r':
440b57cec5SDimitry Andric       Column = 0;
450b57cec5SDimitry Andric       break;
460b57cec5SDimitry Andric     case '\t':
470b57cec5SDimitry Andric       // Assumes tab stop = 8 characters.
480b57cec5SDimitry Andric       Column += (8 - (Column & 0x7)) & 0x7;
490b57cec5SDimitry Andric       break;
500b57cec5SDimitry Andric     }
515ffd83dbSDimitry Andric   };
525ffd83dbSDimitry Andric 
535ffd83dbSDimitry Andric   // If we have a partial UTF-8 sequence from the previous buffer, check that
545ffd83dbSDimitry Andric   // first.
555ffd83dbSDimitry Andric   if (PartialUTF8Char.size()) {
565ffd83dbSDimitry Andric     size_t BytesFromBuffer =
575ffd83dbSDimitry Andric         getNumBytesForUTF8(PartialUTF8Char[0]) - PartialUTF8Char.size();
585ffd83dbSDimitry Andric     if (Size < BytesFromBuffer) {
595ffd83dbSDimitry Andric       // If we still don't have enough bytes for a complete code point, just
605ffd83dbSDimitry Andric       // append what we have.
615ffd83dbSDimitry Andric       PartialUTF8Char.append(StringRef(Ptr, Size));
625ffd83dbSDimitry Andric       return;
635ffd83dbSDimitry Andric     } else {
645ffd83dbSDimitry Andric       // The first few bytes from the buffer will complete the code point.
655ffd83dbSDimitry Andric       // Concatenate them and process their effect on the line and column
665ffd83dbSDimitry Andric       // numbers.
675ffd83dbSDimitry Andric       PartialUTF8Char.append(StringRef(Ptr, BytesFromBuffer));
685ffd83dbSDimitry Andric       ProcessUTF8CodePoint(PartialUTF8Char);
695ffd83dbSDimitry Andric       PartialUTF8Char.clear();
705ffd83dbSDimitry Andric       Ptr += BytesFromBuffer;
715ffd83dbSDimitry Andric       Size -= BytesFromBuffer;
725ffd83dbSDimitry Andric     }
735ffd83dbSDimitry Andric   }
745ffd83dbSDimitry Andric 
755ffd83dbSDimitry Andric   // Now scan the rest of the buffer.
765ffd83dbSDimitry Andric   unsigned NumBytes;
775ffd83dbSDimitry Andric   for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) {
785ffd83dbSDimitry Andric     NumBytes = getNumBytesForUTF8(*Ptr);
795ffd83dbSDimitry Andric 
805ffd83dbSDimitry Andric     // The buffer might end part way through a UTF-8 code unit sequence for a
815ffd83dbSDimitry Andric     // Unicode scalar value if it got flushed. If this happens, we can't know
825ffd83dbSDimitry Andric     // the display width until we see the rest of the code point. Stash the
835ffd83dbSDimitry Andric     // bytes we do have, so that we can reconstruct the whole code point later,
845ffd83dbSDimitry Andric     // even if the buffer is being flushed.
855ffd83dbSDimitry Andric     if ((unsigned)(End - Ptr) < NumBytes) {
865ffd83dbSDimitry Andric       PartialUTF8Char = StringRef(Ptr, End - Ptr);
875ffd83dbSDimitry Andric       return;
885ffd83dbSDimitry Andric     }
895ffd83dbSDimitry Andric 
905ffd83dbSDimitry Andric     ProcessUTF8CodePoint(StringRef(Ptr, NumBytes));
910b57cec5SDimitry Andric   }
920b57cec5SDimitry Andric }
930b57cec5SDimitry Andric 
940b57cec5SDimitry Andric /// ComputePosition - Examine the current output and update line and column
950b57cec5SDimitry Andric /// counts.
ComputePosition(const char * Ptr,size_t Size)960b57cec5SDimitry Andric void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) {
97*439352acSDimitry Andric   if (DisableScan)
98*439352acSDimitry Andric     return;
99*439352acSDimitry Andric 
1000b57cec5SDimitry Andric   // If our previous scan pointer is inside the buffer, assume we already
1010b57cec5SDimitry Andric   // scanned those bytes. This depends on raw_ostream to not change our buffer
1020b57cec5SDimitry Andric   // in unexpected ways.
1030b57cec5SDimitry Andric   if (Ptr <= Scanned && Scanned <= Ptr + Size)
1040b57cec5SDimitry Andric     // Scan all characters added since our last scan to determine the new
1050b57cec5SDimitry Andric     // column.
1065ffd83dbSDimitry Andric     UpdatePosition(Scanned, Size - (Scanned - Ptr));
1070b57cec5SDimitry Andric   else
1085ffd83dbSDimitry Andric     UpdatePosition(Ptr, Size);
1090b57cec5SDimitry Andric 
1100b57cec5SDimitry Andric   // Update the scanning pointer.
1110b57cec5SDimitry Andric   Scanned = Ptr + Size;
1120b57cec5SDimitry Andric }
1130b57cec5SDimitry Andric 
1140b57cec5SDimitry Andric /// PadToColumn - Align the output to some column number.
1150b57cec5SDimitry Andric ///
1160b57cec5SDimitry Andric /// \param NewCol - The column to move to.
1170b57cec5SDimitry Andric ///
PadToColumn(unsigned NewCol)1180b57cec5SDimitry Andric formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
1190b57cec5SDimitry Andric   // Figure out what's in the buffer and add it to the column count.
1200b57cec5SDimitry Andric   ComputePosition(getBufferStart(), GetNumBytesInBuffer());
1210b57cec5SDimitry Andric 
1220b57cec5SDimitry Andric   // Output spaces until we reach the desired column.
1230b57cec5SDimitry Andric   indent(std::max(int(NewCol - getColumn()), 1));
1240b57cec5SDimitry Andric   return *this;
1250b57cec5SDimitry Andric }
1260b57cec5SDimitry Andric 
write_impl(const char * Ptr,size_t Size)1270b57cec5SDimitry Andric void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
1280b57cec5SDimitry Andric   // Figure out what's in the buffer and add it to the column count.
1290b57cec5SDimitry Andric   ComputePosition(Ptr, Size);
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric   // Write the data to the underlying stream (which is unbuffered, so
1320b57cec5SDimitry Andric   // the data will be immediately written out).
1330b57cec5SDimitry Andric   TheStream->write(Ptr, Size);
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric   // Reset the scanning pointer.
1360b57cec5SDimitry Andric   Scanned = nullptr;
1370b57cec5SDimitry Andric }
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric /// fouts() - This returns a reference to a formatted_raw_ostream for
1400b57cec5SDimitry Andric /// standard output.  Use it like: fouts() << "foo" << "bar";
fouts()1410b57cec5SDimitry Andric formatted_raw_ostream &llvm::fouts() {
1420b57cec5SDimitry Andric   static formatted_raw_ostream S(outs());
1430b57cec5SDimitry Andric   return S;
1440b57cec5SDimitry Andric }
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric /// ferrs() - This returns a reference to a formatted_raw_ostream for
1470b57cec5SDimitry Andric /// standard error.  Use it like: ferrs() << "foo" << "bar";
ferrs()1480b57cec5SDimitry Andric formatted_raw_ostream &llvm::ferrs() {
1490b57cec5SDimitry Andric   static formatted_raw_ostream S(errs());
1500b57cec5SDimitry Andric   return S;
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric /// fdbgs() - This returns a reference to a formatted_raw_ostream for
1540b57cec5SDimitry Andric /// the debug stream.  Use it like: fdbgs() << "foo" << "bar";
fdbgs()1550b57cec5SDimitry Andric formatted_raw_ostream &llvm::fdbgs() {
1560b57cec5SDimitry Andric   static formatted_raw_ostream S(dbgs());
1570b57cec5SDimitry Andric   return S;
1580b57cec5SDimitry Andric }
159