xref: /llvm-project/llvm/lib/Support/FormatVariadic.cpp (revision 2f7ffbaad3e7bd14b7a82d4887ef1640272ba1be)
1 //===- FormatVariadic.cpp - Format string parsing and analysis ----*-C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
7 
8 #include "llvm/Support/FormatVariadic.h"
9 #include <cassert>
10 #include <optional>
11 
12 using namespace llvm;
13 
14 static std::optional<AlignStyle> translateLocChar(char C) {
15   switch (C) {
16   case '-':
17     return AlignStyle::Left;
18   case '=':
19     return AlignStyle::Center;
20   case '+':
21     return AlignStyle::Right;
22   default:
23     return std::nullopt;
24   }
25   LLVM_BUILTIN_UNREACHABLE;
26 }
27 
28 static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
29                                unsigned &Align, char &Pad) {
30   Where = AlignStyle::Right;
31   Align = 0;
32   Pad = ' ';
33   if (Spec.empty())
34     return true;
35 
36   if (Spec.size() > 1) {
37     // A maximum of 2 characters at the beginning can be used for something
38     // other than the width.
39     // If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...]
40     // contains the width.
41     // Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width.
42     // Otherwise, Spec[0:...] contains the width.
43     if (auto Loc = translateLocChar(Spec[1])) {
44       Pad = Spec[0];
45       Where = *Loc;
46       Spec = Spec.drop_front(2);
47     } else if (auto Loc = translateLocChar(Spec[0])) {
48       Where = *Loc;
49       Spec = Spec.drop_front(1);
50     }
51   }
52 
53   bool Failed = Spec.consumeInteger(0, Align);
54   return !Failed;
55 }
56 
57 static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec) {
58   StringRef RepString = Spec.trim("{}");
59 
60   // If the replacement sequence does not start with a non-negative integer,
61   // this is an error.
62   char Pad = ' ';
63   unsigned Align = 0;
64   AlignStyle Where = AlignStyle::Right;
65   StringRef Options;
66   unsigned Index = ~0U;
67   RepString = RepString.trim();
68 
69   // If index is not specified, keep it ~0U to indicate unresolved index.
70   RepString.consumeInteger(0, Index);
71   RepString = RepString.trim();
72 
73   if (RepString.consume_front(",")) {
74     if (!consumeFieldLayout(RepString, Where, Align, Pad)) {
75       assert(false && "Invalid replacement field layout specification!");
76       return std::nullopt;
77     }
78   }
79   RepString = RepString.trim();
80   if (RepString.consume_front(":")) {
81     Options = RepString.trim();
82     RepString = StringRef();
83   }
84   RepString = RepString.trim();
85   if (!RepString.empty()) {
86     assert(0 && "Unexpected characters found in replacement string!");
87     return std::nullopt;
88   }
89 
90   return ReplacementItem(Spec, Index, Align, Where, Pad, Options);
91 }
92 
93 static std::pair<std::optional<ReplacementItem>, StringRef>
94 splitLiteralAndReplacement(StringRef Fmt) {
95   assert(!Fmt.empty());
96   // Everything up until the first brace is a literal.
97   if (Fmt.front() != '{') {
98     size_t BO = Fmt.find_first_of('{');
99     return {ReplacementItem{Fmt.substr(0, BO)}, Fmt.substr(BO)};
100   }
101 
102   StringRef Braces = Fmt.take_while([](char C) { return C == '{'; });
103   // If there is more than one brace, then some of them are escaped.  Treat
104   // these as replacements.
105   if (Braces.size() > 1) {
106     size_t NumEscapedBraces = Braces.size() / 2;
107     StringRef Middle = Fmt.take_front(NumEscapedBraces);
108     StringRef Right = Fmt.drop_front(NumEscapedBraces * 2);
109     return {ReplacementItem(Middle), Right};
110   }
111   // An unterminated open brace is undefined. Assert to indicate that this is
112   // undefined and that we consider it an error. When asserts are disabled,
113   // build a replacement item with an error message.
114   size_t BC = Fmt.find_first_of('}');
115   if (BC == StringRef::npos) {
116     assert(false &&
117            "Unterminated brace sequence. Escape with {{ for a literal brace.");
118     return {ReplacementItem("Unterminated brace sequence. Escape with {{ for a "
119                             "literal brace."),
120             StringRef()};
121   }
122 
123   // Even if there is a closing brace, if there is another open brace before
124   // this closing brace, treat this portion as literal, and try again with the
125   // next one.
126   size_t BO2 = Fmt.find_first_of('{', 1);
127   if (BO2 < BC)
128     return {ReplacementItem(Fmt.substr(0, BO2)), Fmt.substr(BO2)};
129 
130   StringRef Spec = Fmt.slice(1, BC);
131   StringRef Right = Fmt.substr(BC + 1);
132 
133   return {parseReplacementItem(Spec), Right};
134 }
135 
136 #ifndef NDEBUG
137 #define ENABLE_VALIDATION 1
138 #else
139 #define ENABLE_VALIDATION 0 // Conveniently enable validation in release mode.
140 #endif
141 
142 SmallVector<ReplacementItem, 2>
143 formatv_object_base::parseFormatString(StringRef Fmt, size_t NumArgs,
144                                        bool Validate) {
145   SmallVector<ReplacementItem, 2> Replacements;
146   unsigned NextAutomaticIndex = 0;
147 
148 #if ENABLE_VALIDATION
149   const StringRef SavedFmtStr = Fmt;
150   unsigned NumExpectedArgs = 0;
151   bool HasExplicitIndex = false;
152 #endif
153 
154   while (!Fmt.empty()) {
155     std::optional<ReplacementItem> I;
156     std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt);
157     if (!I)
158       continue;
159     if (I->Type == ReplacementType::Format) {
160       if (I->Index == ~0U)
161         I->Index = NextAutomaticIndex++;
162 #if ENABLE_VALIDATION
163       else
164         HasExplicitIndex = true;
165       NumExpectedArgs = std::max(NumExpectedArgs, I->Index + 1);
166 #endif
167     }
168 
169     Replacements.emplace_back(*I);
170   }
171 
172 #if ENABLE_VALIDATION
173   if (!Validate)
174     return Replacements;
175 
176   // Perform additional validation. Verify that the number of arguments matches
177   // the number of replacement indices and that there are no holes in the
178   // replacement indices.
179 
180   // When validation fails, return an array of replacement items that
181   // will print an error message as the outout of this formatv() (used when
182   // validation is enabled in release mode).
183   auto getErrorReplacements = [SavedFmtStr](StringLiteral ErrorMsg) {
184     return SmallVector<ReplacementItem, 2>{
185         ReplacementItem("Invalid formatv() call: "), ReplacementItem(ErrorMsg),
186         ReplacementItem(" for format string: "), ReplacementItem(SavedFmtStr)};
187   };
188 
189   if (NumExpectedArgs != NumArgs) {
190     errs() << formatv("Expected {} Args, but got {} for format string '{}'\n",
191                       NumExpectedArgs, NumArgs, SavedFmtStr);
192     assert(0 && "Invalid formatv() call");
193     return getErrorReplacements("Unexpected number of arguments");
194   }
195 
196   // Find the number of unique indices seen. All replacement indices
197   // are < NumExpectedArgs.
198   SmallVector<bool> Indices(NumExpectedArgs);
199   unsigned Count = 0;
200   for (const ReplacementItem &I : Replacements) {
201     if (I.Type != ReplacementType::Format || Indices[I.Index])
202       continue;
203     Indices[I.Index] = true;
204     ++Count;
205   }
206 
207   if (Count != NumExpectedArgs) {
208     errs() << formatv(
209         "Replacement field indices cannot have holes for format string '{}'\n",
210         SavedFmtStr);
211     assert(0 && "Invalid format string");
212     return getErrorReplacements("Replacement indices have holes");
213   }
214 
215   // Fail validation if we see both automatic index and explicit index.
216   if (NextAutomaticIndex != 0 && HasExplicitIndex) {
217     errs() << formatv(
218         "Cannot mix automatic and explicit indices for format string '{}'\n",
219         SavedFmtStr);
220     assert(0 && "Invalid format string");
221     return getErrorReplacements("Cannot mix automatic and explicit indices");
222   }
223 #endif // ENABLE_VALIDATION
224   return Replacements;
225 }
226 
227 void support::detail::format_adapter::anchor() {}
228