xref: /llvm-project/llvm/lib/Support/FormatVariadic.cpp (revision f796a0c7c9299ec16d459de70a92d8a675f47a42)
1 //===- FormatVariadic.cpp - Format string parsing and analysis ----*-C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
7 
8 #include "llvm/Support/FormatVariadic.h"
9 #include <cassert>
10 #include <optional>
11 
12 using namespace llvm;
13 
14 static std::optional<AlignStyle> translateLocChar(char C) {
15   switch (C) {
16   case '-':
17     return AlignStyle::Left;
18   case '=':
19     return AlignStyle::Center;
20   case '+':
21     return AlignStyle::Right;
22   default:
23     return std::nullopt;
24   }
25   LLVM_BUILTIN_UNREACHABLE;
26 }
27 
28 static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
29                                unsigned &Align, char &Pad) {
30   Where = AlignStyle::Right;
31   Align = 0;
32   Pad = ' ';
33   if (Spec.empty())
34     return true;
35 
36   if (Spec.size() > 1) {
37     // A maximum of 2 characters at the beginning can be used for something
38     // other than the width.
39     // If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...]
40     // contains the width.
41     // Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width.
42     // Otherwise, Spec[0:...] contains the width.
43     if (auto Loc = translateLocChar(Spec[1])) {
44       Pad = Spec[0];
45       Where = *Loc;
46       Spec = Spec.drop_front(2);
47     } else if (auto Loc = translateLocChar(Spec[0])) {
48       Where = *Loc;
49       Spec = Spec.drop_front(1);
50     }
51   }
52 
53   bool Failed = Spec.consumeInteger(0, Align);
54   return !Failed;
55 }
56 
57 static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec) {
58   StringRef RepString = Spec.trim("{}");
59 
60   // If the replacement sequence does not start with a non-negative integer,
61   // this is an error.
62   char Pad = ' ';
63   unsigned Align = 0;
64   AlignStyle Where = AlignStyle::Right;
65   StringRef Options;
66   unsigned Index = ~0U;
67   RepString = RepString.ltrim();
68 
69   // If index is not specified, keep it ~0U to indicate unresolved index.
70   RepString.consumeInteger(0, Index);
71 
72   if (RepString.consume_front(",")) {
73     if (!consumeFieldLayout(RepString, Where, Align, Pad)) {
74       assert(false && "Invalid replacement field layout specification!");
75       return std::nullopt;
76     }
77   }
78   RepString = RepString.ltrim();
79   if (RepString.consume_front(":")) {
80     Options = RepString;
81     RepString = StringRef();
82   }
83   RepString = RepString.trim();
84   if (!RepString.empty()) {
85     assert(0 && "Unexpected characters found in replacement string!");
86     return std::nullopt;
87   }
88 
89   return ReplacementItem(Spec, Index, Align, Where, Pad, Options);
90 }
91 
92 static std::pair<std::optional<ReplacementItem>, StringRef>
93 splitLiteralAndReplacement(StringRef Fmt) {
94   assert(!Fmt.empty());
95   // Everything up until the first brace is a literal.
96   if (Fmt.front() != '{') {
97     size_t BO = Fmt.find_first_of('{');
98     return {ReplacementItem{Fmt.substr(0, BO)}, Fmt.substr(BO)};
99   }
100 
101   StringRef Braces = Fmt.take_while([](char C) { return C == '{'; });
102   // If there is more than one brace, then some of them are escaped.  Treat
103   // these as replacements.
104   if (Braces.size() > 1) {
105     size_t NumEscapedBraces = Braces.size() / 2;
106     StringRef Middle = Fmt.take_front(NumEscapedBraces);
107     StringRef Right = Fmt.drop_front(NumEscapedBraces * 2);
108     return {ReplacementItem(Middle), Right};
109   }
110   // An unterminated open brace is undefined. Assert to indicate that this is
111   // undefined and that we consider it an error. When asserts are disabled,
112   // build a replacement item with an error message.
113   size_t BC = Fmt.find_first_of('}');
114   if (BC == StringRef::npos) {
115     assert(false &&
116            "Unterminated brace sequence. Escape with {{ for a literal brace.");
117     return {ReplacementItem("Unterminated brace sequence. Escape with {{ for a "
118                             "literal brace."),
119             StringRef()};
120   }
121 
122   // Even if there is a closing brace, if there is another open brace before
123   // this closing brace, treat this portion as literal, and try again with the
124   // next one.
125   size_t BO2 = Fmt.find_first_of('{', 1);
126   if (BO2 < BC)
127     return {ReplacementItem(Fmt.substr(0, BO2)), Fmt.substr(BO2)};
128 
129   StringRef Spec = Fmt.slice(1, BC);
130   StringRef Right = Fmt.substr(BC + 1);
131 
132   return {parseReplacementItem(Spec), Right};
133 }
134 
135 #ifndef NDEBUG
136 #define ENABLE_VALIDATION 1
137 #else
138 #define ENABLE_VALIDATION 0 // Conveniently enable validation in release mode.
139 #endif
140 
141 SmallVector<ReplacementItem, 2>
142 formatv_object_base::parseFormatString(StringRef Fmt, size_t NumArgs,
143                                        bool Validate) {
144   SmallVector<ReplacementItem, 2> Replacements;
145   unsigned NextAutomaticIndex = 0;
146 
147 #if ENABLE_VALIDATION
148   const StringRef SavedFmtStr = Fmt;
149   unsigned NumExpectedArgs = 0;
150   bool HasExplicitIndex = false;
151 #endif
152 
153   while (!Fmt.empty()) {
154     std::optional<ReplacementItem> I;
155     std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt);
156     if (!I)
157       continue;
158     if (I->Type == ReplacementType::Format) {
159       if (I->Index == ~0U)
160         I->Index = NextAutomaticIndex++;
161 #if ENABLE_VALIDATION
162       else
163         HasExplicitIndex = true;
164       NumExpectedArgs = std::max(NumExpectedArgs, I->Index + 1);
165 #endif
166     }
167 
168     Replacements.emplace_back(*I);
169   }
170 
171 #if ENABLE_VALIDATION
172   if (!Validate)
173     return Replacements;
174 
175   // Perform additional validation. Verify that the number of arguments matches
176   // the number of replacement indices and that there are no holes in the
177   // replacement indices.
178 
179   // When validation fails, return an array of replacement items that
180   // will print an error message as the outout of this formatv() (used when
181   // validation is enabled in release mode).
182   auto getErrorReplacements = [SavedFmtStr](StringLiteral ErrorMsg) {
183     return SmallVector<ReplacementItem, 2>{
184         ReplacementItem("Invalid formatv() call: "), ReplacementItem(ErrorMsg),
185         ReplacementItem(" for format string: "), ReplacementItem(SavedFmtStr)};
186   };
187 
188   if (NumExpectedArgs != NumArgs) {
189     errs() << formatv("Expected {} Args, but got {} for format string '{}'\n",
190                       NumExpectedArgs, NumArgs, SavedFmtStr);
191     assert(0 && "Invalid formatv() call");
192     return getErrorReplacements("Unexpected number of arguments");
193   }
194 
195   // Find the number of unique indices seen. All replacement indices
196   // are < NumExpectedArgs.
197   SmallVector<bool> Indices(NumExpectedArgs);
198   unsigned Count = 0;
199   for (const ReplacementItem &I : Replacements) {
200     if (I.Type != ReplacementType::Format || Indices[I.Index])
201       continue;
202     Indices[I.Index] = true;
203     ++Count;
204   }
205 
206   if (Count != NumExpectedArgs) {
207     errs() << formatv(
208         "Replacement field indices cannot have holes for format string '{}'\n",
209         SavedFmtStr);
210     assert(0 && "Invalid format string");
211     return getErrorReplacements("Replacement indices have holes");
212   }
213 
214   // Fail validation if we see both automatic index and explicit index.
215   if (NextAutomaticIndex != 0 && HasExplicitIndex) {
216     errs() << formatv(
217         "Cannot mix automatic and explicit indices for format string '{}'\n",
218         SavedFmtStr);
219     assert(0 && "Invalid format string");
220     return getErrorReplacements("Cannot mix automatic and explicit indices");
221   }
222 #endif // ENABLE_VALIDATION
223   return Replacements;
224 }
225 
226 void support::detail::format_adapter::anchor() {}
227