1 //===- FormatVariadic.cpp - Format string parsing and analysis ----*-C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 //===----------------------------------------------------------------------===// 7 8 #include "llvm/Support/FormatVariadic.h" 9 #include <cassert> 10 #include <optional> 11 12 using namespace llvm; 13 14 static std::optional<AlignStyle> translateLocChar(char C) { 15 switch (C) { 16 case '-': 17 return AlignStyle::Left; 18 case '=': 19 return AlignStyle::Center; 20 case '+': 21 return AlignStyle::Right; 22 default: 23 return std::nullopt; 24 } 25 LLVM_BUILTIN_UNREACHABLE; 26 } 27 28 static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where, 29 unsigned &Align, char &Pad) { 30 Where = AlignStyle::Right; 31 Align = 0; 32 Pad = ' '; 33 if (Spec.empty()) 34 return true; 35 36 if (Spec.size() > 1) { 37 // A maximum of 2 characters at the beginning can be used for something 38 // other than the width. 39 // If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...] 40 // contains the width. 41 // Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width. 42 // Otherwise, Spec[0:...] contains the width. 43 if (auto Loc = translateLocChar(Spec[1])) { 44 Pad = Spec[0]; 45 Where = *Loc; 46 Spec = Spec.drop_front(2); 47 } else if (auto Loc = translateLocChar(Spec[0])) { 48 Where = *Loc; 49 Spec = Spec.drop_front(1); 50 } 51 } 52 53 bool Failed = Spec.consumeInteger(0, Align); 54 return !Failed; 55 } 56 57 static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec) { 58 StringRef RepString = Spec.trim("{}"); 59 60 // If the replacement sequence does not start with a non-negative integer, 61 // this is an error. 62 char Pad = ' '; 63 unsigned Align = 0; 64 AlignStyle Where = AlignStyle::Right; 65 StringRef Options; 66 unsigned Index = ~0U; 67 RepString = RepString.ltrim(); 68 69 // If index is not specified, keep it ~0U to indicate unresolved index. 70 RepString.consumeInteger(0, Index); 71 72 if (RepString.consume_front(",")) { 73 if (!consumeFieldLayout(RepString, Where, Align, Pad)) { 74 assert(false && "Invalid replacement field layout specification!"); 75 return std::nullopt; 76 } 77 } 78 RepString = RepString.ltrim(); 79 if (RepString.consume_front(":")) { 80 Options = RepString; 81 RepString = StringRef(); 82 } 83 RepString = RepString.trim(); 84 if (!RepString.empty()) { 85 assert(0 && "Unexpected characters found in replacement string!"); 86 return std::nullopt; 87 } 88 89 return ReplacementItem(Spec, Index, Align, Where, Pad, Options); 90 } 91 92 static std::pair<std::optional<ReplacementItem>, StringRef> 93 splitLiteralAndReplacement(StringRef Fmt) { 94 assert(!Fmt.empty()); 95 // Everything up until the first brace is a literal. 96 if (Fmt.front() != '{') { 97 size_t BO = Fmt.find_first_of('{'); 98 return {ReplacementItem{Fmt.substr(0, BO)}, Fmt.substr(BO)}; 99 } 100 101 StringRef Braces = Fmt.take_while([](char C) { return C == '{'; }); 102 // If there is more than one brace, then some of them are escaped. Treat 103 // these as replacements. 104 if (Braces.size() > 1) { 105 size_t NumEscapedBraces = Braces.size() / 2; 106 StringRef Middle = Fmt.take_front(NumEscapedBraces); 107 StringRef Right = Fmt.drop_front(NumEscapedBraces * 2); 108 return {ReplacementItem(Middle), Right}; 109 } 110 // An unterminated open brace is undefined. Assert to indicate that this is 111 // undefined and that we consider it an error. When asserts are disabled, 112 // build a replacement item with an error message. 113 size_t BC = Fmt.find_first_of('}'); 114 if (BC == StringRef::npos) { 115 assert(false && 116 "Unterminated brace sequence. Escape with {{ for a literal brace."); 117 return {ReplacementItem("Unterminated brace sequence. Escape with {{ for a " 118 "literal brace."), 119 StringRef()}; 120 } 121 122 // Even if there is a closing brace, if there is another open brace before 123 // this closing brace, treat this portion as literal, and try again with the 124 // next one. 125 size_t BO2 = Fmt.find_first_of('{', 1); 126 if (BO2 < BC) 127 return {ReplacementItem(Fmt.substr(0, BO2)), Fmt.substr(BO2)}; 128 129 StringRef Spec = Fmt.slice(1, BC); 130 StringRef Right = Fmt.substr(BC + 1); 131 132 return {parseReplacementItem(Spec), Right}; 133 } 134 135 #ifndef NDEBUG 136 #define ENABLE_VALIDATION 1 137 #else 138 #define ENABLE_VALIDATION 0 // Conveniently enable validation in release mode. 139 #endif 140 141 SmallVector<ReplacementItem, 2> 142 formatv_object_base::parseFormatString(StringRef Fmt, size_t NumArgs, 143 bool Validate) { 144 SmallVector<ReplacementItem, 2> Replacements; 145 unsigned NextAutomaticIndex = 0; 146 147 #if ENABLE_VALIDATION 148 const StringRef SavedFmtStr = Fmt; 149 unsigned NumExpectedArgs = 0; 150 bool HasExplicitIndex = false; 151 #endif 152 153 while (!Fmt.empty()) { 154 std::optional<ReplacementItem> I; 155 std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt); 156 if (!I) 157 continue; 158 if (I->Type == ReplacementType::Format) { 159 if (I->Index == ~0U) 160 I->Index = NextAutomaticIndex++; 161 #if ENABLE_VALIDATION 162 else 163 HasExplicitIndex = true; 164 NumExpectedArgs = std::max(NumExpectedArgs, I->Index + 1); 165 #endif 166 } 167 168 Replacements.emplace_back(*I); 169 } 170 171 #if ENABLE_VALIDATION 172 if (!Validate) 173 return Replacements; 174 175 // Perform additional validation. Verify that the number of arguments matches 176 // the number of replacement indices and that there are no holes in the 177 // replacement indices. 178 179 // When validation fails, return an array of replacement items that 180 // will print an error message as the outout of this formatv() (used when 181 // validation is enabled in release mode). 182 auto getErrorReplacements = [SavedFmtStr](StringLiteral ErrorMsg) { 183 return SmallVector<ReplacementItem, 2>{ 184 ReplacementItem("Invalid formatv() call: "), ReplacementItem(ErrorMsg), 185 ReplacementItem(" for format string: "), ReplacementItem(SavedFmtStr)}; 186 }; 187 188 if (NumExpectedArgs != NumArgs) { 189 errs() << formatv("Expected {} Args, but got {} for format string '{}'\n", 190 NumExpectedArgs, NumArgs, SavedFmtStr); 191 assert(0 && "Invalid formatv() call"); 192 return getErrorReplacements("Unexpected number of arguments"); 193 } 194 195 // Find the number of unique indices seen. All replacement indices 196 // are < NumExpectedArgs. 197 SmallVector<bool> Indices(NumExpectedArgs); 198 unsigned Count = 0; 199 for (const ReplacementItem &I : Replacements) { 200 if (I.Type != ReplacementType::Format || Indices[I.Index]) 201 continue; 202 Indices[I.Index] = true; 203 ++Count; 204 } 205 206 if (Count != NumExpectedArgs) { 207 errs() << formatv( 208 "Replacement field indices cannot have holes for format string '{}'\n", 209 SavedFmtStr); 210 assert(0 && "Invalid format string"); 211 return getErrorReplacements("Replacement indices have holes"); 212 } 213 214 // Fail validation if we see both automatic index and explicit index. 215 if (NextAutomaticIndex != 0 && HasExplicitIndex) { 216 errs() << formatv( 217 "Cannot mix automatic and explicit indices for format string '{}'\n", 218 SavedFmtStr); 219 assert(0 && "Invalid format string"); 220 return getErrorReplacements("Cannot mix automatic and explicit indices"); 221 } 222 #endif // ENABLE_VALIDATION 223 return Replacements; 224 } 225 226 void support::detail::format_adapter::anchor() {} 227