xref: /llvm-project/libc/src/stdio/printf_core/parser.h (revision eab63b5a8cf2214ddfee566a87deb3013ffcc362)
14f4752eeSMichael Jones //===-- Format string parser for printf -------------------------*- C++ -*-===//
24f4752eeSMichael Jones //
34f4752eeSMichael Jones // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44f4752eeSMichael Jones // See https://llvm.org/LICENSE.txt for license information.
54f4752eeSMichael Jones // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64f4752eeSMichael Jones //
74f4752eeSMichael Jones //===----------------------------------------------------------------------===//
84f4752eeSMichael Jones 
94f4752eeSMichael Jones #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
104f4752eeSMichael Jones #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
114f4752eeSMichael Jones 
1273aab2f6Slntue #include "include/llvm-libc-macros/stdfix-macros.h"
13d83271b0SCaslyn Tonelli #include "src/__support/CPP/algorithm.h" // max
142f6a8797SRoland McGrath #include "src/__support/CPP/limits.h"
15bf279f90SMichael Jones #include "src/__support/CPP/optional.h"
168e3b6054SMichael Jones #include "src/__support/CPP/type_traits.h"
175ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
18e0be78beSJoseph Huber #include "src/__support/str_to_integer.h"
194f4752eeSMichael Jones #include "src/stdio/printf_core/core_structs.h"
20ecca8952SMichael Jones #include "src/stdio/printf_core/printf_config.h"
214f4752eeSMichael Jones 
224f4752eeSMichael Jones #include <stddef.h>
234f4752eeSMichael Jones 
248e3b6054SMichael Jones #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
258e3b6054SMichael Jones #include "src/__support/fixed_point/fx_rep.h"
268e3b6054SMichael Jones #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
27f009f72dSMichael Jones #ifndef LIBC_COPT_PRINTF_DISABLE_STRERROR
28f009f72dSMichael Jones #include "src/errno/libc_errno.h"
29f009f72dSMichael Jones #endif // LIBC_COPT_PRINTF_DISABLE_STRERROR
308e3b6054SMichael Jones 
315ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
324f4752eeSMichael Jones namespace printf_core {
334f4752eeSMichael Jones 
34e0be78beSJoseph Huber template <typename T> struct int_type_of {
35e0be78beSJoseph Huber   using type = T;
36e0be78beSJoseph Huber };
37e0be78beSJoseph Huber template <> struct int_type_of<double> {
383546f4daSGuillaume Chatelet   using type = fputil::FPBits<double>::StorageType;
39e0be78beSJoseph Huber };
40e0be78beSJoseph Huber template <> struct int_type_of<long double> {
413546f4daSGuillaume Chatelet   using type = fputil::FPBits<long double>::StorageType;
42e0be78beSJoseph Huber };
438e3b6054SMichael Jones 
448e3b6054SMichael Jones #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
458e3b6054SMichael Jones template <typename T>
468e3b6054SMichael Jones struct int_type_of<cpp::enable_if<cpp::is_fixed_point_v<T>, T>> {
478e3b6054SMichael Jones   using type = typename fixed_point::FXRep<T>::StorageType;
488e3b6054SMichael Jones };
498e3b6054SMichael Jones #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
508e3b6054SMichael Jones 
51e0be78beSJoseph Huber template <typename T> using int_type_of_v = typename int_type_of<T>::type;
5247fb6d1cSMichael Jones 
53e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
54e0be78beSJoseph Huber #define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, index)                           \
55e0be78beSJoseph Huber   {                                                                            \
56e0be78beSJoseph Huber     auto temp = get_arg_value<arg_type>(index);                                \
57e0be78beSJoseph Huber     if (!temp.has_value()) {                                                   \
58e0be78beSJoseph Huber       section.has_conv = false;                                                \
59e0be78beSJoseph Huber     } else {                                                                   \
60e0be78beSJoseph Huber       dst = cpp::bit_cast<int_type_of_v<arg_type>>(temp.value());              \
61e0be78beSJoseph Huber     }                                                                          \
62e0be78beSJoseph Huber   }
63e0be78beSJoseph Huber #else
64e0be78beSJoseph Huber #define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, _)                               \
65e0be78beSJoseph Huber   dst = cpp::bit_cast<int_type_of_v<arg_type>>(get_next_arg_value<arg_type>())
66e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
67e0be78beSJoseph Huber 
68e0be78beSJoseph Huber template <typename ArgProvider> class Parser {
694f4752eeSMichael Jones   const char *__restrict str;
704f4752eeSMichael Jones 
714f4752eeSMichael Jones   size_t cur_pos = 0;
7247fb6d1cSMichael Jones   ArgProvider args_cur;
73945fa672SMichael Jones 
74c3228714SGuillaume Chatelet #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
75945fa672SMichael Jones   // args_start stores the start of the va_args, which is allows getting the
76945fa672SMichael Jones   // value of arguments that have already been passed. args_index is tracked so
77945fa672SMichael Jones   // that we know which argument args_cur is on.
7847fb6d1cSMichael Jones   ArgProvider args_start;
794f4752eeSMichael Jones   size_t args_index = 1;
804f4752eeSMichael Jones 
81ecca8952SMichael Jones   // Defined in printf_config.h
82c3228714SGuillaume Chatelet   static constexpr size_t DESC_ARR_LEN = LIBC_COPT_PRINTF_INDEX_ARR_LEN;
83ecca8952SMichael Jones 
8447fb6d1cSMichael Jones   // desc_arr stores the sizes of the variables in the ArgProvider. This is used
8547fb6d1cSMichael Jones   // in index mode to reduce repeated string parsing. The sizes are stored as
86945fa672SMichael Jones   // TypeDesc objects, which store the size as well as minimal type information.
87945fa672SMichael Jones   // This is necessary because some systems separate the floating point and
88945fa672SMichael Jones   // integer values in va_args.
89bf279f90SMichael Jones   TypeDesc desc_arr[DESC_ARR_LEN] = {type_desc_from_type<void>()};
90945fa672SMichael Jones 
914f4752eeSMichael Jones   // TODO: Look into object stores for optimization.
924f4752eeSMichael Jones 
93c3228714SGuillaume Chatelet #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
94945fa672SMichael Jones 
954f4752eeSMichael Jones public:
96c3228714SGuillaume Chatelet #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
9747fb6d1cSMichael Jones   LIBC_INLINE Parser(const char *__restrict new_str, ArgProvider &args)
98096463d0SMichael Jones       : str(new_str), args_cur(args), args_start(args) {}
99945fa672SMichael Jones #else
10047fb6d1cSMichael Jones   LIBC_INLINE Parser(const char *__restrict new_str, ArgProvider &args)
101945fa672SMichael Jones       : str(new_str), args_cur(args) {}
102c3228714SGuillaume Chatelet #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
1034f4752eeSMichael Jones 
1044f4752eeSMichael Jones   // get_next_section will parse the format string until it has a fully
1054f4752eeSMichael Jones   // specified format section. This can either be a raw format section with no
1064f4752eeSMichael Jones   // conversion, or a format section with a conversion that has all of its
1074f4752eeSMichael Jones   // variables stored in the format section.
108e0be78beSJoseph Huber   LIBC_INLINE FormatSection get_next_section() {
109e0be78beSJoseph Huber     FormatSection section;
110e0be78beSJoseph Huber     size_t starting_pos = cur_pos;
111e0be78beSJoseph Huber     if (str[cur_pos] == '%') {
112e0be78beSJoseph Huber       // format section
113e0be78beSJoseph Huber       section.has_conv = true;
114e0be78beSJoseph Huber 
115e0be78beSJoseph Huber       ++cur_pos;
116e0be78beSJoseph Huber       [[maybe_unused]] size_t conv_index = 0;
117e0be78beSJoseph Huber 
118e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
119e0be78beSJoseph Huber       conv_index = parse_index(&cur_pos);
120e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
121e0be78beSJoseph Huber 
122e0be78beSJoseph Huber       section.flags = parse_flags(&cur_pos);
123e0be78beSJoseph Huber 
124e0be78beSJoseph Huber       // handle width
125e0be78beSJoseph Huber       section.min_width = 0;
126e0be78beSJoseph Huber       if (str[cur_pos] == '*') {
127e0be78beSJoseph Huber         ++cur_pos;
128e0be78beSJoseph Huber 
129e0be78beSJoseph Huber         WRITE_ARG_VAL_SIMPLEST(section.min_width, int, parse_index(&cur_pos));
130e0be78beSJoseph Huber       } else if (internal::isdigit(str[cur_pos])) {
131e0be78beSJoseph Huber         auto result = internal::strtointeger<int>(str + cur_pos, 10);
132e0be78beSJoseph Huber         section.min_width = result.value;
133e0be78beSJoseph Huber         cur_pos = cur_pos + result.parsed_len;
134e0be78beSJoseph Huber       }
135e0be78beSJoseph Huber       if (section.min_width < 0) {
136a21fc4c0SMichael Jones         section.min_width =
137a21fc4c0SMichael Jones             (section.min_width == INT_MIN) ? INT_MAX : -section.min_width;
138e0be78beSJoseph Huber         section.flags = static_cast<FormatFlags>(section.flags |
139e0be78beSJoseph Huber                                                  FormatFlags::LEFT_JUSTIFIED);
140e0be78beSJoseph Huber       }
141e0be78beSJoseph Huber 
142e0be78beSJoseph Huber       // handle precision
143e0be78beSJoseph Huber       section.precision = -1; // negative precisions are ignored.
144e0be78beSJoseph Huber       if (str[cur_pos] == '.') {
145e0be78beSJoseph Huber         ++cur_pos;
146e0be78beSJoseph Huber         section.precision = 0; // if there's a . but no specified precision, the
147e0be78beSJoseph Huber                                // precision is implicitly 0.
148e0be78beSJoseph Huber         if (str[cur_pos] == '*') {
149e0be78beSJoseph Huber           ++cur_pos;
150e0be78beSJoseph Huber 
151e0be78beSJoseph Huber           WRITE_ARG_VAL_SIMPLEST(section.precision, int, parse_index(&cur_pos));
152e0be78beSJoseph Huber 
153e0be78beSJoseph Huber         } else if (internal::isdigit(str[cur_pos])) {
154e0be78beSJoseph Huber           auto result = internal::strtointeger<int>(str + cur_pos, 10);
155e0be78beSJoseph Huber           section.precision = result.value;
156e0be78beSJoseph Huber           cur_pos = cur_pos + result.parsed_len;
157e0be78beSJoseph Huber         }
158e0be78beSJoseph Huber       }
159e0be78beSJoseph Huber 
160fdef5c4fSOm Prakaash       auto [lm, bw] = parse_length_modifier(&cur_pos);
161e0be78beSJoseph Huber       section.length_modifier = lm;
162e0be78beSJoseph Huber       section.conv_name = str[cur_pos];
163fdef5c4fSOm Prakaash       section.bit_width = bw;
164e0be78beSJoseph Huber       switch (str[cur_pos]) {
165e0be78beSJoseph Huber       case ('%'):
166e0be78beSJoseph Huber         // Regardless of options, a % conversion is always safe. The standard
167e0be78beSJoseph Huber         // says that "The complete conversion specification shall be %%" but it
168e0be78beSJoseph Huber         // also says that "If a conversion specification is invalid, the
169e0be78beSJoseph Huber         // behavior is undefined." Based on that we define that any conversion
170e0be78beSJoseph Huber         // specification ending in '%' shall display as '%' regardless of any
171e0be78beSJoseph Huber         // valid or invalid options.
172e0be78beSJoseph Huber         section.has_conv = true;
173e0be78beSJoseph Huber         break;
174e0be78beSJoseph Huber       case ('c'):
175e0be78beSJoseph Huber         WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
176e0be78beSJoseph Huber         break;
177e0be78beSJoseph Huber       case ('d'):
178e0be78beSJoseph Huber       case ('i'):
179e0be78beSJoseph Huber       case ('o'):
180e0be78beSJoseph Huber       case ('x'):
181e0be78beSJoseph Huber       case ('X'):
182e0be78beSJoseph Huber       case ('u'):
183e28ca2ddSArtem Tyurin       case ('b'):
184e28ca2ddSArtem Tyurin       case ('B'):
185e0be78beSJoseph Huber         switch (lm) {
186e0be78beSJoseph Huber         case (LengthModifier::hh):
187e0be78beSJoseph Huber         case (LengthModifier::h):
188e0be78beSJoseph Huber         case (LengthModifier::none):
189e0be78beSJoseph Huber           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
190e0be78beSJoseph Huber           break;
191e0be78beSJoseph Huber         case (LengthModifier::l):
192e0be78beSJoseph Huber           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index);
193e0be78beSJoseph Huber           break;
194e0be78beSJoseph Huber         case (LengthModifier::ll):
195e0be78beSJoseph Huber         case (LengthModifier::L): // This isn't in the standard, but is in other
196e0be78beSJoseph Huber                                   // libc implementations.
197e0be78beSJoseph Huber 
198e0be78beSJoseph Huber           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
199e0be78beSJoseph Huber           break;
200e0be78beSJoseph Huber         case (LengthModifier::j):
201e0be78beSJoseph Huber 
202e0be78beSJoseph Huber           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
203e0be78beSJoseph Huber           break;
204e0be78beSJoseph Huber         case (LengthModifier::z):
205e0be78beSJoseph Huber 
206e0be78beSJoseph Huber           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, size_t, conv_index);
207e0be78beSJoseph Huber           break;
208e0be78beSJoseph Huber         case (LengthModifier::t):
209e0be78beSJoseph Huber 
210e0be78beSJoseph Huber           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index);
211e0be78beSJoseph Huber           break;
212fdef5c4fSOm Prakaash 
213fdef5c4fSOm Prakaash         case (LengthModifier::w):
214fdef5c4fSOm Prakaash         case (LengthModifier::wf):
215fdef5c4fSOm Prakaash           if (bw == 0) {
216fdef5c4fSOm Prakaash             section.has_conv = false;
217a21cf566SRoland McGrath           } else if (bw <= cpp::numeric_limits<unsigned int>::digits) {
218fdef5c4fSOm Prakaash             WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index);
219a21cf566SRoland McGrath           } else if (bw <= cpp::numeric_limits<unsigned long>::digits) {
220fdef5c4fSOm Prakaash             WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index);
221a21cf566SRoland McGrath           } else if (bw <= cpp::numeric_limits<unsigned long long>::digits) {
222fdef5c4fSOm Prakaash             WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index);
223fdef5c4fSOm Prakaash           } else {
224fdef5c4fSOm Prakaash             WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index);
225fdef5c4fSOm Prakaash           }
226fdef5c4fSOm Prakaash           break;
227e0be78beSJoseph Huber         }
228e0be78beSJoseph Huber         break;
229e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
230e0be78beSJoseph Huber       case ('f'):
231e0be78beSJoseph Huber       case ('F'):
232e0be78beSJoseph Huber       case ('e'):
233e0be78beSJoseph Huber       case ('E'):
234e0be78beSJoseph Huber       case ('a'):
235e0be78beSJoseph Huber       case ('A'):
236e0be78beSJoseph Huber       case ('g'):
237e0be78beSJoseph Huber       case ('G'):
238e0be78beSJoseph Huber         if (lm != LengthModifier::L) {
239e0be78beSJoseph Huber           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, double, conv_index);
240e0be78beSJoseph Huber         } else {
241e0be78beSJoseph Huber           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long double, conv_index);
242e0be78beSJoseph Huber         }
243e0be78beSJoseph Huber         break;
244e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
2458e3b6054SMichael Jones #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
2468e3b6054SMichael Jones       // Capitalization represents sign, but we only need to get the right
2478e3b6054SMichael Jones       // bitwidth here so we ignore that.
2488e3b6054SMichael Jones       case ('r'):
2498e3b6054SMichael Jones       case ('R'):
2508e3b6054SMichael Jones         // all fract sizes we support are less than 32 bits, and currently doing
2518e3b6054SMichael Jones         // va_args with fixed point types just doesn't work.
2528e3b6054SMichael Jones         // TODO: Move to fixed point types once va_args supports it.
2538e3b6054SMichael Jones         WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, uint32_t, conv_index);
2548e3b6054SMichael Jones         break;
2558e3b6054SMichael Jones       case ('k'):
2568e3b6054SMichael Jones       case ('K'):
2578e3b6054SMichael Jones         if (lm == LengthModifier::l) {
2588e3b6054SMichael Jones           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, uint64_t, conv_index);
2598e3b6054SMichael Jones         } else {
2608e3b6054SMichael Jones           WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, uint32_t, conv_index);
2618e3b6054SMichael Jones         }
2628e3b6054SMichael Jones         break;
2638e3b6054SMichael Jones #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
264f009f72dSMichael Jones #ifndef LIBC_COPT_PRINTF_DISABLE_STRERROR
265f009f72dSMichael Jones       case ('m'):
266f009f72dSMichael Jones         // %m is an odd conversion in that it doesn't consume an argument, it
267f009f72dSMichael Jones         // just takes the current value of errno as its argument.
268*eab63b5aSMichael Jones         section.conv_val_raw = static_cast<int>(libc_errno);
269f009f72dSMichael Jones         break;
270f009f72dSMichael Jones #endif // LIBC_COPT_PRINTF_DISABLE_STRERROR
271e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
272f009f72dSMichael Jones       case ('n'): // Intentional fallthrough
273e0be78beSJoseph Huber #endif            // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
274e0be78beSJoseph Huber       case ('p'):
275e0be78beSJoseph Huber         WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index);
276e0be78beSJoseph Huber         break;
277e0be78beSJoseph Huber       case ('s'):
278e0be78beSJoseph Huber         WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, char *, conv_index);
279e0be78beSJoseph Huber         break;
280e0be78beSJoseph Huber       default:
281e0be78beSJoseph Huber         // if the conversion is undefined, change this to a raw section.
282e0be78beSJoseph Huber         section.has_conv = false;
283e0be78beSJoseph Huber         break;
284e0be78beSJoseph Huber       }
285e0be78beSJoseph Huber       // If the end of the format section is on the '\0'. This means we need to
286e0be78beSJoseph Huber       // not advance the cur_pos.
287e0be78beSJoseph Huber       if (str[cur_pos] != '\0')
288e0be78beSJoseph Huber         ++cur_pos;
289e0be78beSJoseph Huber 
290e0be78beSJoseph Huber     } else {
291e0be78beSJoseph Huber       // raw section
292e0be78beSJoseph Huber       section.has_conv = false;
293e0be78beSJoseph Huber       while (str[cur_pos] != '%' && str[cur_pos] != '\0')
294e0be78beSJoseph Huber         ++cur_pos;
295e0be78beSJoseph Huber     }
296e0be78beSJoseph Huber     section.raw_string = {str + starting_pos, cur_pos - starting_pos};
297e0be78beSJoseph Huber     return section;
298e0be78beSJoseph Huber   }
2994f4752eeSMichael Jones 
3004f4752eeSMichael Jones private:
3014f4752eeSMichael Jones   // parse_flags parses the flags inside a format string. It assumes that
3024f4752eeSMichael Jones   // str[*local_pos] is inside a format specifier, and parses any flags it
3034f4752eeSMichael Jones   // finds. It returns a FormatFlags object containing the set of found flags
3044f4752eeSMichael Jones   // arithmetically or'd together. local_pos will be moved past any flags found.
305e0be78beSJoseph Huber   LIBC_INLINE FormatFlags parse_flags(size_t *local_pos) {
306e0be78beSJoseph Huber     bool found_flag = true;
307e0be78beSJoseph Huber     FormatFlags flags = FormatFlags(0);
308e0be78beSJoseph Huber     while (found_flag) {
309e0be78beSJoseph Huber       switch (str[*local_pos]) {
310e0be78beSJoseph Huber       case '-':
311e0be78beSJoseph Huber         flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED);
312e0be78beSJoseph Huber         break;
313e0be78beSJoseph Huber       case '+':
314e0be78beSJoseph Huber         flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN);
315e0be78beSJoseph Huber         break;
316e0be78beSJoseph Huber       case ' ':
317e0be78beSJoseph Huber         flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX);
318e0be78beSJoseph Huber         break;
319e0be78beSJoseph Huber       case '#':
320e0be78beSJoseph Huber         flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM);
321e0be78beSJoseph Huber         break;
322e0be78beSJoseph Huber       case '0':
323e0be78beSJoseph Huber         flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES);
324e0be78beSJoseph Huber         break;
325e0be78beSJoseph Huber       default:
326e0be78beSJoseph Huber         found_flag = false;
327e0be78beSJoseph Huber       }
328e0be78beSJoseph Huber       if (found_flag)
329e0be78beSJoseph Huber         ++*local_pos;
330e0be78beSJoseph Huber     }
331e0be78beSJoseph Huber     return flags;
332e0be78beSJoseph Huber   }
3334f4752eeSMichael Jones 
3344f4752eeSMichael Jones   // parse_length_modifier parses the length modifier inside a format string. It
3354f4752eeSMichael Jones   // assumes that str[*local_pos] is inside a format specifier. It returns a
3364f4752eeSMichael Jones   // LengthModifier with the length modifier it found. It will advance local_pos
3374f4752eeSMichael Jones   // after the format specifier if one is found.
338fdef5c4fSOm Prakaash   LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) {
339e0be78beSJoseph Huber     switch (str[*local_pos]) {
340e0be78beSJoseph Huber     case ('l'):
341e0be78beSJoseph Huber       if (str[*local_pos + 1] == 'l') {
342e0be78beSJoseph Huber         *local_pos += 2;
343fdef5c4fSOm Prakaash         return {LengthModifier::ll, 0};
344e0be78beSJoseph Huber       } else {
345e0be78beSJoseph Huber         ++*local_pos;
346fdef5c4fSOm Prakaash         return {LengthModifier::l, 0};
347fdef5c4fSOm Prakaash       }
348fdef5c4fSOm Prakaash     case ('w'): {
349fdef5c4fSOm Prakaash       LengthModifier lm;
350fdef5c4fSOm Prakaash       if (str[*local_pos + 1] == 'f') {
351fdef5c4fSOm Prakaash         *local_pos += 2;
352fdef5c4fSOm Prakaash         lm = LengthModifier::wf;
353fdef5c4fSOm Prakaash       } else {
354fdef5c4fSOm Prakaash         ++*local_pos;
355fdef5c4fSOm Prakaash         lm = LengthModifier::w;
356fdef5c4fSOm Prakaash       }
357fdef5c4fSOm Prakaash       if (internal::isdigit(str[*local_pos])) {
358fdef5c4fSOm Prakaash         const auto result = internal::strtointeger<int>(str + *local_pos, 10);
359fdef5c4fSOm Prakaash         *local_pos += result.parsed_len;
360fdef5c4fSOm Prakaash         return {lm, static_cast<size_t>(cpp::max(0, result.value))};
361fdef5c4fSOm Prakaash       }
362fdef5c4fSOm Prakaash       return {lm, 0};
363e0be78beSJoseph Huber     }
364e0be78beSJoseph Huber     case ('h'):
365e0be78beSJoseph Huber       if (str[*local_pos + 1] == 'h') {
366e0be78beSJoseph Huber         *local_pos += 2;
367fdef5c4fSOm Prakaash         return {LengthModifier::hh, 0};
368e0be78beSJoseph Huber       } else {
369e0be78beSJoseph Huber         ++*local_pos;
370fdef5c4fSOm Prakaash         return {LengthModifier::h, 0};
371e0be78beSJoseph Huber       }
372e0be78beSJoseph Huber     case ('L'):
373e0be78beSJoseph Huber       ++*local_pos;
374fdef5c4fSOm Prakaash       return {LengthModifier::L, 0};
375e0be78beSJoseph Huber     case ('j'):
376e0be78beSJoseph Huber       ++*local_pos;
377fdef5c4fSOm Prakaash       return {LengthModifier::j, 0};
378e0be78beSJoseph Huber     case ('z'):
379e0be78beSJoseph Huber       ++*local_pos;
380fdef5c4fSOm Prakaash       return {LengthModifier::z, 0};
381e0be78beSJoseph Huber     case ('t'):
382e0be78beSJoseph Huber       ++*local_pos;
383fdef5c4fSOm Prakaash       return {LengthModifier::t, 0};
384e0be78beSJoseph Huber     default:
385fdef5c4fSOm Prakaash       return {LengthModifier::none, 0};
386e0be78beSJoseph Huber     }
387e0be78beSJoseph Huber   }
3884f4752eeSMichael Jones 
3894f4752eeSMichael Jones   // get_next_arg_value gets the next value from the arg list as type T.
390494734b0SSiva Chandra Reddy   template <class T> LIBC_INLINE T get_next_arg_value() {
391e0be78beSJoseph Huber     return args_cur.template next_var<T>();
3924f4752eeSMichael Jones   }
393945fa672SMichael Jones 
394945fa672SMichael Jones   //----------------------------------------------------
395945fa672SMichael Jones   // INDEX MODE ONLY FUNCTIONS AFTER HERE:
396945fa672SMichael Jones   //----------------------------------------------------
397945fa672SMichael Jones 
398c3228714SGuillaume Chatelet #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
399945fa672SMichael Jones 
400945fa672SMichael Jones   // parse_index parses the index of a value inside a format string. It
401945fa672SMichael Jones   // assumes that str[*local_pos] points to character after a '%' or '*', and
402945fa672SMichael Jones   // returns 0 if there is no closing $, or if it finds no number. If it finds a
403945fa672SMichael Jones   // number, it will move local_pos past the end of the $, else it will not move
404945fa672SMichael Jones   // local_pos.
405e0be78beSJoseph Huber   LIBC_INLINE size_t parse_index(size_t *local_pos) {
406e0be78beSJoseph Huber     if (internal::isdigit(str[*local_pos])) {
407e0be78beSJoseph Huber       auto result = internal::strtointeger<int>(str + *local_pos, 10);
408e0be78beSJoseph Huber       size_t index = result.value;
409e0be78beSJoseph Huber       if (str[*local_pos + result.parsed_len] != '$')
410e0be78beSJoseph Huber         return 0;
411e0be78beSJoseph Huber       *local_pos = 1 + result.parsed_len + *local_pos;
412e0be78beSJoseph Huber       return index;
413e0be78beSJoseph Huber     }
414e0be78beSJoseph Huber     return 0;
415e0be78beSJoseph Huber   }
416945fa672SMichael Jones 
417494734b0SSiva Chandra Reddy   LIBC_INLINE void set_type_desc(size_t index, TypeDesc value) {
418945fa672SMichael Jones     if (index != 0 && index <= DESC_ARR_LEN)
419945fa672SMichael Jones       desc_arr[index - 1] = value;
420945fa672SMichael Jones   }
421945fa672SMichael Jones 
422945fa672SMichael Jones   // get_arg_value gets the value from the arg list at index (starting at 1).
423945fa672SMichael Jones   // This may require parsing the format string. An index of 0 is interpreted as
424bf279f90SMichael Jones   // the next value. If the format string is not valid, it may have gaps in its
425bf279f90SMichael Jones   // indexes. Requesting the value for any index after a gap will fail, since
426bf279f90SMichael Jones   // the arg list must be read in order and with the correct types.
427bf279f90SMichael Jones   template <class T> LIBC_INLINE cpp::optional<T> get_arg_value(size_t index) {
428bf279f90SMichael Jones     if (!(index == 0 || index == args_index)) {
429bf279f90SMichael Jones       bool success = args_to_index(index);
430bf279f90SMichael Jones       if (!success) {
431bf279f90SMichael Jones         // If we can't get to this index, then the value of the arg can't be
432bf279f90SMichael Jones         // found.
433bf279f90SMichael Jones         return cpp::optional<T>();
434bf279f90SMichael Jones       }
435bf279f90SMichael Jones     }
436945fa672SMichael Jones 
437848c700bSMichael Jones     set_type_desc(index, type_desc_from_type<T>());
438945fa672SMichael Jones 
439945fa672SMichael Jones     ++args_index;
440945fa672SMichael Jones     return get_next_arg_value<T>();
441945fa672SMichael Jones   }
442945fa672SMichael Jones 
44347fb6d1cSMichael Jones   // the ArgProvider can only return the next item in the list. This function is
444945fa672SMichael Jones   // used in index mode when the item that needs to be read is not the next one.
445678e3ee1SFangrui Song   // It moves cur_args to the index requested so the appropriate value may
446945fa672SMichael Jones   // be read. This may involve parsing the format string, and is in the worst
447945fa672SMichael Jones   // case an O(n^2) operation.
448e0be78beSJoseph Huber   LIBC_INLINE bool args_to_index(size_t index) {
449e0be78beSJoseph Huber     if (args_index > index) {
450e0be78beSJoseph Huber       args_index = 1;
451e0be78beSJoseph Huber       args_cur = args_start;
452e0be78beSJoseph Huber     }
453e0be78beSJoseph Huber 
454e0be78beSJoseph Huber     while (args_index < index) {
455e0be78beSJoseph Huber       TypeDesc cur_type_desc = type_desc_from_type<void>();
456e0be78beSJoseph Huber       if (args_index <= DESC_ARR_LEN)
457e0be78beSJoseph Huber         cur_type_desc = desc_arr[args_index - 1];
458e0be78beSJoseph Huber 
459e0be78beSJoseph Huber       if (cur_type_desc == type_desc_from_type<void>())
460e0be78beSJoseph Huber         cur_type_desc = get_type_desc(args_index);
461e0be78beSJoseph Huber 
462e0be78beSJoseph Huber       // A type of void represents the type being unknown. If the type for the
463e0be78beSJoseph Huber       // requested index isn't in the desc_arr and isn't found by parsing the
464e0be78beSJoseph Huber       // string, then then advancing to the requested index is impossible. In
465e0be78beSJoseph Huber       // that case the function returns false.
466e0be78beSJoseph Huber       if (cur_type_desc == type_desc_from_type<void>())
467e0be78beSJoseph Huber         return false;
468e0be78beSJoseph Huber 
469e0be78beSJoseph Huber       if (cur_type_desc == type_desc_from_type<uint32_t>())
470e0be78beSJoseph Huber         args_cur.template next_var<uint32_t>();
471e0be78beSJoseph Huber       else if (cur_type_desc == type_desc_from_type<uint64_t>())
472e0be78beSJoseph Huber         args_cur.template next_var<uint64_t>();
473e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
474e0be78beSJoseph Huber       // Floating point numbers are stored separately from the other arguments.
475e0be78beSJoseph Huber       else if (cur_type_desc == type_desc_from_type<double>())
476e0be78beSJoseph Huber         args_cur.template next_var<double>();
477e0be78beSJoseph Huber       else if (cur_type_desc == type_desc_from_type<long double>())
478e0be78beSJoseph Huber         args_cur.template next_var<long double>();
479e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
4808e3b6054SMichael Jones #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
4818e3b6054SMichael Jones       // Floating point numbers may be stored separately from the other
4828e3b6054SMichael Jones       // arguments.
4838e3b6054SMichael Jones       else if (cur_type_desc == type_desc_from_type<short fract>())
4848e3b6054SMichael Jones         args_cur.template next_var<short fract>();
4858e3b6054SMichael Jones       else if (cur_type_desc == type_desc_from_type<fract>())
4868e3b6054SMichael Jones         args_cur.template next_var<fract>();
4878e3b6054SMichael Jones       else if (cur_type_desc == type_desc_from_type<long fract>())
4888e3b6054SMichael Jones         args_cur.template next_var<long fract>();
4898e3b6054SMichael Jones       else if (cur_type_desc == type_desc_from_type<short accum>())
4908e3b6054SMichael Jones         args_cur.template next_var<short accum>();
4918e3b6054SMichael Jones       else if (cur_type_desc == type_desc_from_type<accum>())
4928e3b6054SMichael Jones         args_cur.template next_var<accum>();
4938e3b6054SMichael Jones       else if (cur_type_desc == type_desc_from_type<long accum>())
4948e3b6054SMichael Jones         args_cur.template next_var<long accum>();
4958e3b6054SMichael Jones #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
496e0be78beSJoseph Huber       // pointers may be stored separately from normal values.
497e0be78beSJoseph Huber       else if (cur_type_desc == type_desc_from_type<void *>())
498e0be78beSJoseph Huber         args_cur.template next_var<void *>();
499e0be78beSJoseph Huber       else
500e0be78beSJoseph Huber         args_cur.template next_var<uint32_t>();
501e0be78beSJoseph Huber 
502e0be78beSJoseph Huber       ++args_index;
503e0be78beSJoseph Huber     }
504e0be78beSJoseph Huber     return true;
505e0be78beSJoseph Huber   }
506945fa672SMichael Jones 
507945fa672SMichael Jones   // get_type_desc assumes that this format string uses index mode. It iterates
508945fa672SMichael Jones   // through the format string until it finds a format specifier that defines
509945fa672SMichael Jones   // the type of index, and returns a TypeDesc describing that type. It does not
510945fa672SMichael Jones   // modify cur_pos.
511e0be78beSJoseph Huber   LIBC_INLINE TypeDesc get_type_desc(size_t index) {
5121650f1b3SJay Foad     // index mode is assumed, and the indices start at 1, so an index
513e0be78beSJoseph Huber     // of 0 is invalid.
514e0be78beSJoseph Huber     size_t local_pos = 0;
515e0be78beSJoseph Huber 
516e0be78beSJoseph Huber     while (str[local_pos]) {
517e0be78beSJoseph Huber       if (str[local_pos] == '%') {
518e0be78beSJoseph Huber         ++local_pos;
519e0be78beSJoseph Huber 
520e0be78beSJoseph Huber         size_t conv_index = parse_index(&local_pos);
521e0be78beSJoseph Huber 
522e0be78beSJoseph Huber         // the flags aren't relevant for this situation, but I need to skip past
523e0be78beSJoseph Huber         // them so they're parsed but the result is discarded.
524e0be78beSJoseph Huber         parse_flags(&local_pos);
525e0be78beSJoseph Huber 
526e0be78beSJoseph Huber         // handle width
527e0be78beSJoseph Huber         if (str[local_pos] == '*') {
528e0be78beSJoseph Huber           ++local_pos;
529e0be78beSJoseph Huber 
530e0be78beSJoseph Huber           size_t width_index = parse_index(&local_pos);
531e0be78beSJoseph Huber           set_type_desc(width_index, type_desc_from_type<int>());
532e0be78beSJoseph Huber           if (width_index == index)
533e0be78beSJoseph Huber             return type_desc_from_type<int>();
534e0be78beSJoseph Huber 
535e0be78beSJoseph Huber         } else if (internal::isdigit(str[local_pos])) {
536e0be78beSJoseph Huber           while (internal::isdigit(str[local_pos]))
537e0be78beSJoseph Huber             ++local_pos;
538e0be78beSJoseph Huber         }
539e0be78beSJoseph Huber 
540e0be78beSJoseph Huber         // handle precision
541e0be78beSJoseph Huber         if (str[local_pos] == '.') {
542e0be78beSJoseph Huber           ++local_pos;
543e0be78beSJoseph Huber           if (str[local_pos] == '*') {
544e0be78beSJoseph Huber             ++local_pos;
545e0be78beSJoseph Huber 
546e0be78beSJoseph Huber             size_t precision_index = parse_index(&local_pos);
547e0be78beSJoseph Huber             set_type_desc(precision_index, type_desc_from_type<int>());
548e0be78beSJoseph Huber             if (precision_index == index)
549e0be78beSJoseph Huber               return type_desc_from_type<int>();
550e0be78beSJoseph Huber 
551e0be78beSJoseph Huber           } else if (internal::isdigit(str[local_pos])) {
552e0be78beSJoseph Huber             while (internal::isdigit(str[local_pos]))
553e0be78beSJoseph Huber               ++local_pos;
554e0be78beSJoseph Huber           }
555e0be78beSJoseph Huber         }
556e0be78beSJoseph Huber 
557fdef5c4fSOm Prakaash         auto [lm, bw] = parse_length_modifier(&local_pos);
558e0be78beSJoseph Huber 
559e0be78beSJoseph Huber         // if we don't have an index for this conversion, then its position is
560e0be78beSJoseph Huber         // unknown and all this information is irrelevant. The rest of this
561e0be78beSJoseph Huber         // logic has been for skipping past this conversion properly to avoid
562e0be78beSJoseph Huber         // weirdness with %%.
563e0be78beSJoseph Huber         if (conv_index == 0) {
564e0be78beSJoseph Huber           if (str[local_pos] != '\0')
565e0be78beSJoseph Huber             ++local_pos;
566e0be78beSJoseph Huber           continue;
567e0be78beSJoseph Huber         }
568e0be78beSJoseph Huber 
569e0be78beSJoseph Huber         TypeDesc conv_size = type_desc_from_type<void>();
570e0be78beSJoseph Huber         switch (str[local_pos]) {
571e0be78beSJoseph Huber         case ('%'):
572e0be78beSJoseph Huber           conv_size = type_desc_from_type<void>();
573e0be78beSJoseph Huber           break;
574e0be78beSJoseph Huber         case ('c'):
575e0be78beSJoseph Huber           conv_size = type_desc_from_type<int>();
576e0be78beSJoseph Huber           break;
577e0be78beSJoseph Huber         case ('d'):
578e0be78beSJoseph Huber         case ('i'):
579e0be78beSJoseph Huber         case ('o'):
580e0be78beSJoseph Huber         case ('x'):
581e0be78beSJoseph Huber         case ('X'):
582e0be78beSJoseph Huber         case ('u'):
583e28ca2ddSArtem Tyurin         case ('b'):
584e28ca2ddSArtem Tyurin         case ('B'):
585e0be78beSJoseph Huber           switch (lm) {
586e0be78beSJoseph Huber           case (LengthModifier::hh):
587e0be78beSJoseph Huber           case (LengthModifier::h):
588e0be78beSJoseph Huber           case (LengthModifier::none):
589e0be78beSJoseph Huber             conv_size = type_desc_from_type<int>();
590e0be78beSJoseph Huber             break;
591e0be78beSJoseph Huber           case (LengthModifier::l):
592e0be78beSJoseph Huber             conv_size = type_desc_from_type<long>();
593e0be78beSJoseph Huber             break;
594e0be78beSJoseph Huber           case (LengthModifier::ll):
595e0be78beSJoseph Huber           case (LengthModifier::L): // This isn't in the standard, but is in
596e0be78beSJoseph Huber                                     // other libc implementations.
597e0be78beSJoseph Huber             conv_size = type_desc_from_type<long long>();
598e0be78beSJoseph Huber             break;
599e0be78beSJoseph Huber           case (LengthModifier::j):
600e0be78beSJoseph Huber             conv_size = type_desc_from_type<intmax_t>();
601e0be78beSJoseph Huber             break;
602e0be78beSJoseph Huber           case (LengthModifier::z):
603e0be78beSJoseph Huber             conv_size = type_desc_from_type<size_t>();
604e0be78beSJoseph Huber             break;
605e0be78beSJoseph Huber           case (LengthModifier::t):
606e0be78beSJoseph Huber             conv_size = type_desc_from_type<ptrdiff_t>();
607e0be78beSJoseph Huber             break;
608fdef5c4fSOm Prakaash           case (LengthModifier::w):
609fdef5c4fSOm Prakaash           case (LengthModifier::wf):
6106e8a751eSRoland McGrath             if (bw <= cpp::numeric_limits<unsigned int>::digits) {
611fdef5c4fSOm Prakaash               conv_size = type_desc_from_type<int>();
6126e8a751eSRoland McGrath             } else if (bw <= cpp::numeric_limits<unsigned long>::digits) {
613fdef5c4fSOm Prakaash               conv_size = type_desc_from_type<long>();
6146e8a751eSRoland McGrath             } else if (bw <= cpp::numeric_limits<unsigned long long>::digits) {
615fdef5c4fSOm Prakaash               conv_size = type_desc_from_type<long long>();
616fdef5c4fSOm Prakaash             } else {
617fdef5c4fSOm Prakaash               conv_size = type_desc_from_type<intmax_t>();
618fdef5c4fSOm Prakaash             }
619fdef5c4fSOm Prakaash             break;
620e0be78beSJoseph Huber           }
621e0be78beSJoseph Huber           break;
622e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT
623e0be78beSJoseph Huber         case ('f'):
624e0be78beSJoseph Huber         case ('F'):
625e0be78beSJoseph Huber         case ('e'):
626e0be78beSJoseph Huber         case ('E'):
627e0be78beSJoseph Huber         case ('a'):
628e0be78beSJoseph Huber         case ('A'):
629e0be78beSJoseph Huber         case ('g'):
630e0be78beSJoseph Huber         case ('G'):
631e0be78beSJoseph Huber           if (lm != LengthModifier::L)
632e0be78beSJoseph Huber             conv_size = type_desc_from_type<double>();
633e0be78beSJoseph Huber           else
634e0be78beSJoseph Huber             conv_size = type_desc_from_type<long double>();
635e0be78beSJoseph Huber           break;
636e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
6378e3b6054SMichael Jones #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
6388e3b6054SMichael Jones         // Capitalization represents sign, but we only need to get the right
6398e3b6054SMichael Jones         // bitwidth here so we ignore that.
6408e3b6054SMichael Jones         case ('r'):
6418e3b6054SMichael Jones         case ('R'):
6428e3b6054SMichael Jones           conv_size = type_desc_from_type<uint32_t>();
6438e3b6054SMichael Jones           break;
6448e3b6054SMichael Jones         case ('k'):
6458e3b6054SMichael Jones         case ('K'):
6468e3b6054SMichael Jones           if (lm == LengthModifier::l) {
6478e3b6054SMichael Jones             conv_size = type_desc_from_type<uint64_t>();
6488e3b6054SMichael Jones           } else {
6498e3b6054SMichael Jones             conv_size = type_desc_from_type<uint32_t>();
6508e3b6054SMichael Jones           }
6518e3b6054SMichael Jones           break;
6528e3b6054SMichael Jones #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
653e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
654e0be78beSJoseph Huber         case ('n'):
655e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
656e0be78beSJoseph Huber         case ('p'):
657e0be78beSJoseph Huber         case ('s'):
658e0be78beSJoseph Huber           conv_size = type_desc_from_type<void *>();
659e0be78beSJoseph Huber           break;
660e0be78beSJoseph Huber         default:
661e0be78beSJoseph Huber           conv_size = type_desc_from_type<int>();
662e0be78beSJoseph Huber           break;
663e0be78beSJoseph Huber         }
664e0be78beSJoseph Huber 
665e0be78beSJoseph Huber         set_type_desc(conv_index, conv_size);
666e0be78beSJoseph Huber         if (conv_index == index)
667e0be78beSJoseph Huber           return conv_size;
668e0be78beSJoseph Huber       }
669e0be78beSJoseph Huber       // If the end of the format section is on the '\0'. This means we need to
670e0be78beSJoseph Huber       // not advance the local_pos.
671e0be78beSJoseph Huber       if (str[local_pos] != '\0')
672e0be78beSJoseph Huber         ++local_pos;
673e0be78beSJoseph Huber     }
674e0be78beSJoseph Huber 
675e0be78beSJoseph Huber     // If there is no size for the requested index, then it's unknown. Return
676e0be78beSJoseph Huber     // void.
677e0be78beSJoseph Huber     return type_desc_from_type<void>();
678e0be78beSJoseph Huber   }
679945fa672SMichael Jones 
680c3228714SGuillaume Chatelet #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE
6814f4752eeSMichael Jones };
6824f4752eeSMichael Jones 
6834f4752eeSMichael Jones } // namespace printf_core
6845ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
6854f4752eeSMichael Jones 
6864f4752eeSMichael Jones #endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H
687