1 //===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of formatted_raw_ostream. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/FormattedStream.h" 14 #include "llvm/Support/ConvertUTF.h" 15 #include "llvm/Support/Debug.h" 16 #include "llvm/Support/Unicode.h" 17 #include "llvm/Support/raw_ostream.h" 18 #include <algorithm> 19 20 using namespace llvm; 21 22 /// UpdatePosition - Examine the given char sequence and figure out which 23 /// column we end up in after output, and how many line breaks are contained. 24 /// This assumes that the input string is well-formed UTF-8, and takes into 25 /// account Unicode characters which render as multiple columns wide. 26 void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) { 27 unsigned &Column = Position.first; 28 unsigned &Line = Position.second; 29 30 auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) { 31 int Width = sys::unicode::columnWidthUTF8(CP); 32 if (Width != sys::unicode::ErrorNonPrintableCharacter) 33 Column += Width; 34 35 // The only special whitespace characters we care about are single-byte. 36 if (CP.size() > 1) 37 return; 38 39 switch (CP[0]) { 40 case '\n': 41 Line += 1; 42 [[fallthrough]]; 43 case '\r': 44 Column = 0; 45 break; 46 case '\t': 47 // Assumes tab stop = 8 characters. 48 Column += (8 - (Column & 0x7)) & 0x7; 49 break; 50 } 51 }; 52 53 // If we have a partial UTF-8 sequence from the previous buffer, check that 54 // first. 55 if (PartialUTF8Char.size()) { 56 size_t BytesFromBuffer = 57 getNumBytesForUTF8(PartialUTF8Char[0]) - PartialUTF8Char.size(); 58 if (Size < BytesFromBuffer) { 59 // If we still don't have enough bytes for a complete code point, just 60 // append what we have. 61 PartialUTF8Char.append(StringRef(Ptr, Size)); 62 return; 63 } else { 64 // The first few bytes from the buffer will complete the code point. 65 // Concatenate them and process their effect on the line and column 66 // numbers. 67 PartialUTF8Char.append(StringRef(Ptr, BytesFromBuffer)); 68 ProcessUTF8CodePoint(PartialUTF8Char); 69 PartialUTF8Char.clear(); 70 Ptr += BytesFromBuffer; 71 Size -= BytesFromBuffer; 72 } 73 } 74 75 // Now scan the rest of the buffer. 76 unsigned NumBytes; 77 for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) { 78 // Fast path for printable ASCII characters without special handling. 79 if (*Ptr >= 0x20 && *Ptr <= 0x7e) { 80 NumBytes = 1; 81 ++Column; 82 continue; 83 } 84 85 NumBytes = getNumBytesForUTF8(*Ptr); 86 87 // The buffer might end part way through a UTF-8 code unit sequence for a 88 // Unicode scalar value if it got flushed. If this happens, we can't know 89 // the display width until we see the rest of the code point. Stash the 90 // bytes we do have, so that we can reconstruct the whole code point later, 91 // even if the buffer is being flushed. 92 if ((unsigned)(End - Ptr) < NumBytes) { 93 PartialUTF8Char = StringRef(Ptr, End - Ptr); 94 return; 95 } 96 97 ProcessUTF8CodePoint(StringRef(Ptr, NumBytes)); 98 } 99 } 100 101 /// ComputePosition - Examine the current output and update line and column 102 /// counts. 103 void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) { 104 if (DisableScan) 105 return; 106 107 // If our previous scan pointer is inside the buffer, assume we already 108 // scanned those bytes. This depends on raw_ostream to not change our buffer 109 // in unexpected ways. 110 if (Ptr <= Scanned && Scanned <= Ptr + Size) 111 // Scan all characters added since our last scan to determine the new 112 // column. 113 UpdatePosition(Scanned, Size - (Scanned - Ptr)); 114 else 115 UpdatePosition(Ptr, Size); 116 117 // Update the scanning pointer. 118 Scanned = Ptr + Size; 119 } 120 121 /// PadToColumn - Align the output to some column number. 122 /// 123 /// \param NewCol - The column to move to. 124 /// 125 formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { 126 // Figure out what's in the buffer and add it to the column count. 127 ComputePosition(getBufferStart(), GetNumBytesInBuffer()); 128 129 // Output spaces until we reach the desired column. 130 indent(std::max(int(NewCol - getColumn()), 1)); 131 return *this; 132 } 133 134 void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) { 135 // Figure out what's in the buffer and add it to the column count. 136 ComputePosition(Ptr, Size); 137 138 // Write the data to the underlying stream (which is unbuffered, so 139 // the data will be immediately written out). 140 TheStream->write(Ptr, Size); 141 142 // Reset the scanning pointer. 143 Scanned = nullptr; 144 } 145 146 /// fouts() - This returns a reference to a formatted_raw_ostream for 147 /// standard output. Use it like: fouts() << "foo" << "bar"; 148 formatted_raw_ostream &llvm::fouts() { 149 static formatted_raw_ostream S(outs()); 150 return S; 151 } 152 153 /// ferrs() - This returns a reference to a formatted_raw_ostream for 154 /// standard error. Use it like: ferrs() << "foo" << "bar"; 155 formatted_raw_ostream &llvm::ferrs() { 156 static formatted_raw_ostream S(errs()); 157 return S; 158 } 159 160 /// fdbgs() - This returns a reference to a formatted_raw_ostream for 161 /// the debug stream. Use it like: fdbgs() << "foo" << "bar"; 162 formatted_raw_ostream &llvm::fdbgs() { 163 static formatted_raw_ostream S(dbgs()); 164 return S; 165 } 166