14f4752eeSMichael Jones //===-- Format string parser for printf -------------------------*- C++ -*-===// 24f4752eeSMichael Jones // 34f4752eeSMichael Jones // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 44f4752eeSMichael Jones // See https://llvm.org/LICENSE.txt for license information. 54f4752eeSMichael Jones // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 64f4752eeSMichael Jones // 74f4752eeSMichael Jones //===----------------------------------------------------------------------===// 84f4752eeSMichael Jones 94f4752eeSMichael Jones #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H 104f4752eeSMichael Jones #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H 114f4752eeSMichael Jones 1273aab2f6Slntue #include "include/llvm-libc-macros/stdfix-macros.h" 13d83271b0SCaslyn Tonelli #include "src/__support/CPP/algorithm.h" // max 142f6a8797SRoland McGrath #include "src/__support/CPP/limits.h" 15bf279f90SMichael Jones #include "src/__support/CPP/optional.h" 168e3b6054SMichael Jones #include "src/__support/CPP/type_traits.h" 175ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 18e0be78beSJoseph Huber #include "src/__support/str_to_integer.h" 194f4752eeSMichael Jones #include "src/stdio/printf_core/core_structs.h" 20ecca8952SMichael Jones #include "src/stdio/printf_core/printf_config.h" 214f4752eeSMichael Jones 224f4752eeSMichael Jones #include <stddef.h> 234f4752eeSMichael Jones 248e3b6054SMichael Jones #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT 258e3b6054SMichael Jones #include "src/__support/fixed_point/fx_rep.h" 268e3b6054SMichael Jones #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT 27f009f72dSMichael Jones #ifndef LIBC_COPT_PRINTF_DISABLE_STRERROR 28f009f72dSMichael Jones #include "src/errno/libc_errno.h" 29f009f72dSMichael Jones #endif // LIBC_COPT_PRINTF_DISABLE_STRERROR 308e3b6054SMichael Jones 315ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 324f4752eeSMichael Jones namespace printf_core { 334f4752eeSMichael Jones 34e0be78beSJoseph Huber template <typename T> struct int_type_of { 35e0be78beSJoseph Huber using type = T; 36e0be78beSJoseph Huber }; 37e0be78beSJoseph Huber template <> struct int_type_of<double> { 383546f4daSGuillaume Chatelet using type = fputil::FPBits<double>::StorageType; 39e0be78beSJoseph Huber }; 40e0be78beSJoseph Huber template <> struct int_type_of<long double> { 413546f4daSGuillaume Chatelet using type = fputil::FPBits<long double>::StorageType; 42e0be78beSJoseph Huber }; 438e3b6054SMichael Jones 448e3b6054SMichael Jones #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT 458e3b6054SMichael Jones template <typename T> 468e3b6054SMichael Jones struct int_type_of<cpp::enable_if<cpp::is_fixed_point_v<T>, T>> { 478e3b6054SMichael Jones using type = typename fixed_point::FXRep<T>::StorageType; 488e3b6054SMichael Jones }; 498e3b6054SMichael Jones #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT 508e3b6054SMichael Jones 51e0be78beSJoseph Huber template <typename T> using int_type_of_v = typename int_type_of<T>::type; 5247fb6d1cSMichael Jones 53e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 54e0be78beSJoseph Huber #define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, index) \ 55e0be78beSJoseph Huber { \ 56e0be78beSJoseph Huber auto temp = get_arg_value<arg_type>(index); \ 57e0be78beSJoseph Huber if (!temp.has_value()) { \ 58e0be78beSJoseph Huber section.has_conv = false; \ 59e0be78beSJoseph Huber } else { \ 60e0be78beSJoseph Huber dst = cpp::bit_cast<int_type_of_v<arg_type>>(temp.value()); \ 61e0be78beSJoseph Huber } \ 62e0be78beSJoseph Huber } 63e0be78beSJoseph Huber #else 64e0be78beSJoseph Huber #define WRITE_ARG_VAL_SIMPLEST(dst, arg_type, _) \ 65e0be78beSJoseph Huber dst = cpp::bit_cast<int_type_of_v<arg_type>>(get_next_arg_value<arg_type>()) 66e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 67e0be78beSJoseph Huber 68e0be78beSJoseph Huber template <typename ArgProvider> class Parser { 694f4752eeSMichael Jones const char *__restrict str; 704f4752eeSMichael Jones 714f4752eeSMichael Jones size_t cur_pos = 0; 7247fb6d1cSMichael Jones ArgProvider args_cur; 73945fa672SMichael Jones 74c3228714SGuillaume Chatelet #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 75945fa672SMichael Jones // args_start stores the start of the va_args, which is allows getting the 76945fa672SMichael Jones // value of arguments that have already been passed. args_index is tracked so 77945fa672SMichael Jones // that we know which argument args_cur is on. 7847fb6d1cSMichael Jones ArgProvider args_start; 794f4752eeSMichael Jones size_t args_index = 1; 804f4752eeSMichael Jones 81ecca8952SMichael Jones // Defined in printf_config.h 82c3228714SGuillaume Chatelet static constexpr size_t DESC_ARR_LEN = LIBC_COPT_PRINTF_INDEX_ARR_LEN; 83ecca8952SMichael Jones 8447fb6d1cSMichael Jones // desc_arr stores the sizes of the variables in the ArgProvider. This is used 8547fb6d1cSMichael Jones // in index mode to reduce repeated string parsing. The sizes are stored as 86945fa672SMichael Jones // TypeDesc objects, which store the size as well as minimal type information. 87945fa672SMichael Jones // This is necessary because some systems separate the floating point and 88945fa672SMichael Jones // integer values in va_args. 89bf279f90SMichael Jones TypeDesc desc_arr[DESC_ARR_LEN] = {type_desc_from_type<void>()}; 90945fa672SMichael Jones 914f4752eeSMichael Jones // TODO: Look into object stores for optimization. 924f4752eeSMichael Jones 93c3228714SGuillaume Chatelet #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 94945fa672SMichael Jones 954f4752eeSMichael Jones public: 96c3228714SGuillaume Chatelet #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 9747fb6d1cSMichael Jones LIBC_INLINE Parser(const char *__restrict new_str, ArgProvider &args) 98096463d0SMichael Jones : str(new_str), args_cur(args), args_start(args) {} 99945fa672SMichael Jones #else 10047fb6d1cSMichael Jones LIBC_INLINE Parser(const char *__restrict new_str, ArgProvider &args) 101945fa672SMichael Jones : str(new_str), args_cur(args) {} 102c3228714SGuillaume Chatelet #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 1034f4752eeSMichael Jones 1044f4752eeSMichael Jones // get_next_section will parse the format string until it has a fully 1054f4752eeSMichael Jones // specified format section. This can either be a raw format section with no 1064f4752eeSMichael Jones // conversion, or a format section with a conversion that has all of its 1074f4752eeSMichael Jones // variables stored in the format section. 108e0be78beSJoseph Huber LIBC_INLINE FormatSection get_next_section() { 109e0be78beSJoseph Huber FormatSection section; 110e0be78beSJoseph Huber size_t starting_pos = cur_pos; 111e0be78beSJoseph Huber if (str[cur_pos] == '%') { 112e0be78beSJoseph Huber // format section 113e0be78beSJoseph Huber section.has_conv = true; 114e0be78beSJoseph Huber 115e0be78beSJoseph Huber ++cur_pos; 116e0be78beSJoseph Huber [[maybe_unused]] size_t conv_index = 0; 117e0be78beSJoseph Huber 118e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 119e0be78beSJoseph Huber conv_index = parse_index(&cur_pos); 120e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 121e0be78beSJoseph Huber 122e0be78beSJoseph Huber section.flags = parse_flags(&cur_pos); 123e0be78beSJoseph Huber 124e0be78beSJoseph Huber // handle width 125e0be78beSJoseph Huber section.min_width = 0; 126e0be78beSJoseph Huber if (str[cur_pos] == '*') { 127e0be78beSJoseph Huber ++cur_pos; 128e0be78beSJoseph Huber 129e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.min_width, int, parse_index(&cur_pos)); 130e0be78beSJoseph Huber } else if (internal::isdigit(str[cur_pos])) { 131e0be78beSJoseph Huber auto result = internal::strtointeger<int>(str + cur_pos, 10); 132e0be78beSJoseph Huber section.min_width = result.value; 133e0be78beSJoseph Huber cur_pos = cur_pos + result.parsed_len; 134e0be78beSJoseph Huber } 135e0be78beSJoseph Huber if (section.min_width < 0) { 136a21fc4c0SMichael Jones section.min_width = 137a21fc4c0SMichael Jones (section.min_width == INT_MIN) ? INT_MAX : -section.min_width; 138e0be78beSJoseph Huber section.flags = static_cast<FormatFlags>(section.flags | 139e0be78beSJoseph Huber FormatFlags::LEFT_JUSTIFIED); 140e0be78beSJoseph Huber } 141e0be78beSJoseph Huber 142e0be78beSJoseph Huber // handle precision 143e0be78beSJoseph Huber section.precision = -1; // negative precisions are ignored. 144e0be78beSJoseph Huber if (str[cur_pos] == '.') { 145e0be78beSJoseph Huber ++cur_pos; 146e0be78beSJoseph Huber section.precision = 0; // if there's a . but no specified precision, the 147e0be78beSJoseph Huber // precision is implicitly 0. 148e0be78beSJoseph Huber if (str[cur_pos] == '*') { 149e0be78beSJoseph Huber ++cur_pos; 150e0be78beSJoseph Huber 151e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.precision, int, parse_index(&cur_pos)); 152e0be78beSJoseph Huber 153e0be78beSJoseph Huber } else if (internal::isdigit(str[cur_pos])) { 154e0be78beSJoseph Huber auto result = internal::strtointeger<int>(str + cur_pos, 10); 155e0be78beSJoseph Huber section.precision = result.value; 156e0be78beSJoseph Huber cur_pos = cur_pos + result.parsed_len; 157e0be78beSJoseph Huber } 158e0be78beSJoseph Huber } 159e0be78beSJoseph Huber 160fdef5c4fSOm Prakaash auto [lm, bw] = parse_length_modifier(&cur_pos); 161e0be78beSJoseph Huber section.length_modifier = lm; 162e0be78beSJoseph Huber section.conv_name = str[cur_pos]; 163fdef5c4fSOm Prakaash section.bit_width = bw; 164e0be78beSJoseph Huber switch (str[cur_pos]) { 165e0be78beSJoseph Huber case ('%'): 166e0be78beSJoseph Huber // Regardless of options, a % conversion is always safe. The standard 167e0be78beSJoseph Huber // says that "The complete conversion specification shall be %%" but it 168e0be78beSJoseph Huber // also says that "If a conversion specification is invalid, the 169e0be78beSJoseph Huber // behavior is undefined." Based on that we define that any conversion 170e0be78beSJoseph Huber // specification ending in '%' shall display as '%' regardless of any 171e0be78beSJoseph Huber // valid or invalid options. 172e0be78beSJoseph Huber section.has_conv = true; 173e0be78beSJoseph Huber break; 174e0be78beSJoseph Huber case ('c'): 175e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index); 176e0be78beSJoseph Huber break; 177e0be78beSJoseph Huber case ('d'): 178e0be78beSJoseph Huber case ('i'): 179e0be78beSJoseph Huber case ('o'): 180e0be78beSJoseph Huber case ('x'): 181e0be78beSJoseph Huber case ('X'): 182e0be78beSJoseph Huber case ('u'): 183e28ca2ddSArtem Tyurin case ('b'): 184e28ca2ddSArtem Tyurin case ('B'): 185e0be78beSJoseph Huber switch (lm) { 186e0be78beSJoseph Huber case (LengthModifier::hh): 187e0be78beSJoseph Huber case (LengthModifier::h): 188e0be78beSJoseph Huber case (LengthModifier::none): 189e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index); 190e0be78beSJoseph Huber break; 191e0be78beSJoseph Huber case (LengthModifier::l): 192e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index); 193e0be78beSJoseph Huber break; 194e0be78beSJoseph Huber case (LengthModifier::ll): 195e0be78beSJoseph Huber case (LengthModifier::L): // This isn't in the standard, but is in other 196e0be78beSJoseph Huber // libc implementations. 197e0be78beSJoseph Huber 198e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index); 199e0be78beSJoseph Huber break; 200e0be78beSJoseph Huber case (LengthModifier::j): 201e0be78beSJoseph Huber 202e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index); 203e0be78beSJoseph Huber break; 204e0be78beSJoseph Huber case (LengthModifier::z): 205e0be78beSJoseph Huber 206e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, size_t, conv_index); 207e0be78beSJoseph Huber break; 208e0be78beSJoseph Huber case (LengthModifier::t): 209e0be78beSJoseph Huber 210e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index); 211e0be78beSJoseph Huber break; 212fdef5c4fSOm Prakaash 213fdef5c4fSOm Prakaash case (LengthModifier::w): 214fdef5c4fSOm Prakaash case (LengthModifier::wf): 215fdef5c4fSOm Prakaash if (bw == 0) { 216fdef5c4fSOm Prakaash section.has_conv = false; 217a21cf566SRoland McGrath } else if (bw <= cpp::numeric_limits<unsigned int>::digits) { 218fdef5c4fSOm Prakaash WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index); 219a21cf566SRoland McGrath } else if (bw <= cpp::numeric_limits<unsigned long>::digits) { 220fdef5c4fSOm Prakaash WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index); 221a21cf566SRoland McGrath } else if (bw <= cpp::numeric_limits<unsigned long long>::digits) { 222fdef5c4fSOm Prakaash WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index); 223fdef5c4fSOm Prakaash } else { 224fdef5c4fSOm Prakaash WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index); 225fdef5c4fSOm Prakaash } 226fdef5c4fSOm Prakaash break; 227e0be78beSJoseph Huber } 228e0be78beSJoseph Huber break; 229e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT 230e0be78beSJoseph Huber case ('f'): 231e0be78beSJoseph Huber case ('F'): 232e0be78beSJoseph Huber case ('e'): 233e0be78beSJoseph Huber case ('E'): 234e0be78beSJoseph Huber case ('a'): 235e0be78beSJoseph Huber case ('A'): 236e0be78beSJoseph Huber case ('g'): 237e0be78beSJoseph Huber case ('G'): 238e0be78beSJoseph Huber if (lm != LengthModifier::L) { 239e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, double, conv_index); 240e0be78beSJoseph Huber } else { 241e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long double, conv_index); 242e0be78beSJoseph Huber } 243e0be78beSJoseph Huber break; 244e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT 2458e3b6054SMichael Jones #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT 2468e3b6054SMichael Jones // Capitalization represents sign, but we only need to get the right 2478e3b6054SMichael Jones // bitwidth here so we ignore that. 2488e3b6054SMichael Jones case ('r'): 2498e3b6054SMichael Jones case ('R'): 2508e3b6054SMichael Jones // all fract sizes we support are less than 32 bits, and currently doing 2518e3b6054SMichael Jones // va_args with fixed point types just doesn't work. 2528e3b6054SMichael Jones // TODO: Move to fixed point types once va_args supports it. 2538e3b6054SMichael Jones WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, uint32_t, conv_index); 2548e3b6054SMichael Jones break; 2558e3b6054SMichael Jones case ('k'): 2568e3b6054SMichael Jones case ('K'): 2578e3b6054SMichael Jones if (lm == LengthModifier::l) { 2588e3b6054SMichael Jones WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, uint64_t, conv_index); 2598e3b6054SMichael Jones } else { 2608e3b6054SMichael Jones WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, uint32_t, conv_index); 2618e3b6054SMichael Jones } 2628e3b6054SMichael Jones break; 2638e3b6054SMichael Jones #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT 264f009f72dSMichael Jones #ifndef LIBC_COPT_PRINTF_DISABLE_STRERROR 265f009f72dSMichael Jones case ('m'): 266f009f72dSMichael Jones // %m is an odd conversion in that it doesn't consume an argument, it 267f009f72dSMichael Jones // just takes the current value of errno as its argument. 268*eab63b5aSMichael Jones section.conv_val_raw = static_cast<int>(libc_errno); 269f009f72dSMichael Jones break; 270f009f72dSMichael Jones #endif // LIBC_COPT_PRINTF_DISABLE_STRERROR 271e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT 272f009f72dSMichael Jones case ('n'): // Intentional fallthrough 273e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT 274e0be78beSJoseph Huber case ('p'): 275e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, void *, conv_index); 276e0be78beSJoseph Huber break; 277e0be78beSJoseph Huber case ('s'): 278e0be78beSJoseph Huber WRITE_ARG_VAL_SIMPLEST(section.conv_val_ptr, char *, conv_index); 279e0be78beSJoseph Huber break; 280e0be78beSJoseph Huber default: 281e0be78beSJoseph Huber // if the conversion is undefined, change this to a raw section. 282e0be78beSJoseph Huber section.has_conv = false; 283e0be78beSJoseph Huber break; 284e0be78beSJoseph Huber } 285e0be78beSJoseph Huber // If the end of the format section is on the '\0'. This means we need to 286e0be78beSJoseph Huber // not advance the cur_pos. 287e0be78beSJoseph Huber if (str[cur_pos] != '\0') 288e0be78beSJoseph Huber ++cur_pos; 289e0be78beSJoseph Huber 290e0be78beSJoseph Huber } else { 291e0be78beSJoseph Huber // raw section 292e0be78beSJoseph Huber section.has_conv = false; 293e0be78beSJoseph Huber while (str[cur_pos] != '%' && str[cur_pos] != '\0') 294e0be78beSJoseph Huber ++cur_pos; 295e0be78beSJoseph Huber } 296e0be78beSJoseph Huber section.raw_string = {str + starting_pos, cur_pos - starting_pos}; 297e0be78beSJoseph Huber return section; 298e0be78beSJoseph Huber } 2994f4752eeSMichael Jones 3004f4752eeSMichael Jones private: 3014f4752eeSMichael Jones // parse_flags parses the flags inside a format string. It assumes that 3024f4752eeSMichael Jones // str[*local_pos] is inside a format specifier, and parses any flags it 3034f4752eeSMichael Jones // finds. It returns a FormatFlags object containing the set of found flags 3044f4752eeSMichael Jones // arithmetically or'd together. local_pos will be moved past any flags found. 305e0be78beSJoseph Huber LIBC_INLINE FormatFlags parse_flags(size_t *local_pos) { 306e0be78beSJoseph Huber bool found_flag = true; 307e0be78beSJoseph Huber FormatFlags flags = FormatFlags(0); 308e0be78beSJoseph Huber while (found_flag) { 309e0be78beSJoseph Huber switch (str[*local_pos]) { 310e0be78beSJoseph Huber case '-': 311e0be78beSJoseph Huber flags = static_cast<FormatFlags>(flags | FormatFlags::LEFT_JUSTIFIED); 312e0be78beSJoseph Huber break; 313e0be78beSJoseph Huber case '+': 314e0be78beSJoseph Huber flags = static_cast<FormatFlags>(flags | FormatFlags::FORCE_SIGN); 315e0be78beSJoseph Huber break; 316e0be78beSJoseph Huber case ' ': 317e0be78beSJoseph Huber flags = static_cast<FormatFlags>(flags | FormatFlags::SPACE_PREFIX); 318e0be78beSJoseph Huber break; 319e0be78beSJoseph Huber case '#': 320e0be78beSJoseph Huber flags = static_cast<FormatFlags>(flags | FormatFlags::ALTERNATE_FORM); 321e0be78beSJoseph Huber break; 322e0be78beSJoseph Huber case '0': 323e0be78beSJoseph Huber flags = static_cast<FormatFlags>(flags | FormatFlags::LEADING_ZEROES); 324e0be78beSJoseph Huber break; 325e0be78beSJoseph Huber default: 326e0be78beSJoseph Huber found_flag = false; 327e0be78beSJoseph Huber } 328e0be78beSJoseph Huber if (found_flag) 329e0be78beSJoseph Huber ++*local_pos; 330e0be78beSJoseph Huber } 331e0be78beSJoseph Huber return flags; 332e0be78beSJoseph Huber } 3334f4752eeSMichael Jones 3344f4752eeSMichael Jones // parse_length_modifier parses the length modifier inside a format string. It 3354f4752eeSMichael Jones // assumes that str[*local_pos] is inside a format specifier. It returns a 3364f4752eeSMichael Jones // LengthModifier with the length modifier it found. It will advance local_pos 3374f4752eeSMichael Jones // after the format specifier if one is found. 338fdef5c4fSOm Prakaash LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) { 339e0be78beSJoseph Huber switch (str[*local_pos]) { 340e0be78beSJoseph Huber case ('l'): 341e0be78beSJoseph Huber if (str[*local_pos + 1] == 'l') { 342e0be78beSJoseph Huber *local_pos += 2; 343fdef5c4fSOm Prakaash return {LengthModifier::ll, 0}; 344e0be78beSJoseph Huber } else { 345e0be78beSJoseph Huber ++*local_pos; 346fdef5c4fSOm Prakaash return {LengthModifier::l, 0}; 347fdef5c4fSOm Prakaash } 348fdef5c4fSOm Prakaash case ('w'): { 349fdef5c4fSOm Prakaash LengthModifier lm; 350fdef5c4fSOm Prakaash if (str[*local_pos + 1] == 'f') { 351fdef5c4fSOm Prakaash *local_pos += 2; 352fdef5c4fSOm Prakaash lm = LengthModifier::wf; 353fdef5c4fSOm Prakaash } else { 354fdef5c4fSOm Prakaash ++*local_pos; 355fdef5c4fSOm Prakaash lm = LengthModifier::w; 356fdef5c4fSOm Prakaash } 357fdef5c4fSOm Prakaash if (internal::isdigit(str[*local_pos])) { 358fdef5c4fSOm Prakaash const auto result = internal::strtointeger<int>(str + *local_pos, 10); 359fdef5c4fSOm Prakaash *local_pos += result.parsed_len; 360fdef5c4fSOm Prakaash return {lm, static_cast<size_t>(cpp::max(0, result.value))}; 361fdef5c4fSOm Prakaash } 362fdef5c4fSOm Prakaash return {lm, 0}; 363e0be78beSJoseph Huber } 364e0be78beSJoseph Huber case ('h'): 365e0be78beSJoseph Huber if (str[*local_pos + 1] == 'h') { 366e0be78beSJoseph Huber *local_pos += 2; 367fdef5c4fSOm Prakaash return {LengthModifier::hh, 0}; 368e0be78beSJoseph Huber } else { 369e0be78beSJoseph Huber ++*local_pos; 370fdef5c4fSOm Prakaash return {LengthModifier::h, 0}; 371e0be78beSJoseph Huber } 372e0be78beSJoseph Huber case ('L'): 373e0be78beSJoseph Huber ++*local_pos; 374fdef5c4fSOm Prakaash return {LengthModifier::L, 0}; 375e0be78beSJoseph Huber case ('j'): 376e0be78beSJoseph Huber ++*local_pos; 377fdef5c4fSOm Prakaash return {LengthModifier::j, 0}; 378e0be78beSJoseph Huber case ('z'): 379e0be78beSJoseph Huber ++*local_pos; 380fdef5c4fSOm Prakaash return {LengthModifier::z, 0}; 381e0be78beSJoseph Huber case ('t'): 382e0be78beSJoseph Huber ++*local_pos; 383fdef5c4fSOm Prakaash return {LengthModifier::t, 0}; 384e0be78beSJoseph Huber default: 385fdef5c4fSOm Prakaash return {LengthModifier::none, 0}; 386e0be78beSJoseph Huber } 387e0be78beSJoseph Huber } 3884f4752eeSMichael Jones 3894f4752eeSMichael Jones // get_next_arg_value gets the next value from the arg list as type T. 390494734b0SSiva Chandra Reddy template <class T> LIBC_INLINE T get_next_arg_value() { 391e0be78beSJoseph Huber return args_cur.template next_var<T>(); 3924f4752eeSMichael Jones } 393945fa672SMichael Jones 394945fa672SMichael Jones //---------------------------------------------------- 395945fa672SMichael Jones // INDEX MODE ONLY FUNCTIONS AFTER HERE: 396945fa672SMichael Jones //---------------------------------------------------- 397945fa672SMichael Jones 398c3228714SGuillaume Chatelet #ifndef LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 399945fa672SMichael Jones 400945fa672SMichael Jones // parse_index parses the index of a value inside a format string. It 401945fa672SMichael Jones // assumes that str[*local_pos] points to character after a '%' or '*', and 402945fa672SMichael Jones // returns 0 if there is no closing $, or if it finds no number. If it finds a 403945fa672SMichael Jones // number, it will move local_pos past the end of the $, else it will not move 404945fa672SMichael Jones // local_pos. 405e0be78beSJoseph Huber LIBC_INLINE size_t parse_index(size_t *local_pos) { 406e0be78beSJoseph Huber if (internal::isdigit(str[*local_pos])) { 407e0be78beSJoseph Huber auto result = internal::strtointeger<int>(str + *local_pos, 10); 408e0be78beSJoseph Huber size_t index = result.value; 409e0be78beSJoseph Huber if (str[*local_pos + result.parsed_len] != '$') 410e0be78beSJoseph Huber return 0; 411e0be78beSJoseph Huber *local_pos = 1 + result.parsed_len + *local_pos; 412e0be78beSJoseph Huber return index; 413e0be78beSJoseph Huber } 414e0be78beSJoseph Huber return 0; 415e0be78beSJoseph Huber } 416945fa672SMichael Jones 417494734b0SSiva Chandra Reddy LIBC_INLINE void set_type_desc(size_t index, TypeDesc value) { 418945fa672SMichael Jones if (index != 0 && index <= DESC_ARR_LEN) 419945fa672SMichael Jones desc_arr[index - 1] = value; 420945fa672SMichael Jones } 421945fa672SMichael Jones 422945fa672SMichael Jones // get_arg_value gets the value from the arg list at index (starting at 1). 423945fa672SMichael Jones // This may require parsing the format string. An index of 0 is interpreted as 424bf279f90SMichael Jones // the next value. If the format string is not valid, it may have gaps in its 425bf279f90SMichael Jones // indexes. Requesting the value for any index after a gap will fail, since 426bf279f90SMichael Jones // the arg list must be read in order and with the correct types. 427bf279f90SMichael Jones template <class T> LIBC_INLINE cpp::optional<T> get_arg_value(size_t index) { 428bf279f90SMichael Jones if (!(index == 0 || index == args_index)) { 429bf279f90SMichael Jones bool success = args_to_index(index); 430bf279f90SMichael Jones if (!success) { 431bf279f90SMichael Jones // If we can't get to this index, then the value of the arg can't be 432bf279f90SMichael Jones // found. 433bf279f90SMichael Jones return cpp::optional<T>(); 434bf279f90SMichael Jones } 435bf279f90SMichael Jones } 436945fa672SMichael Jones 437848c700bSMichael Jones set_type_desc(index, type_desc_from_type<T>()); 438945fa672SMichael Jones 439945fa672SMichael Jones ++args_index; 440945fa672SMichael Jones return get_next_arg_value<T>(); 441945fa672SMichael Jones } 442945fa672SMichael Jones 44347fb6d1cSMichael Jones // the ArgProvider can only return the next item in the list. This function is 444945fa672SMichael Jones // used in index mode when the item that needs to be read is not the next one. 445678e3ee1SFangrui Song // It moves cur_args to the index requested so the appropriate value may 446945fa672SMichael Jones // be read. This may involve parsing the format string, and is in the worst 447945fa672SMichael Jones // case an O(n^2) operation. 448e0be78beSJoseph Huber LIBC_INLINE bool args_to_index(size_t index) { 449e0be78beSJoseph Huber if (args_index > index) { 450e0be78beSJoseph Huber args_index = 1; 451e0be78beSJoseph Huber args_cur = args_start; 452e0be78beSJoseph Huber } 453e0be78beSJoseph Huber 454e0be78beSJoseph Huber while (args_index < index) { 455e0be78beSJoseph Huber TypeDesc cur_type_desc = type_desc_from_type<void>(); 456e0be78beSJoseph Huber if (args_index <= DESC_ARR_LEN) 457e0be78beSJoseph Huber cur_type_desc = desc_arr[args_index - 1]; 458e0be78beSJoseph Huber 459e0be78beSJoseph Huber if (cur_type_desc == type_desc_from_type<void>()) 460e0be78beSJoseph Huber cur_type_desc = get_type_desc(args_index); 461e0be78beSJoseph Huber 462e0be78beSJoseph Huber // A type of void represents the type being unknown. If the type for the 463e0be78beSJoseph Huber // requested index isn't in the desc_arr and isn't found by parsing the 464e0be78beSJoseph Huber // string, then then advancing to the requested index is impossible. In 465e0be78beSJoseph Huber // that case the function returns false. 466e0be78beSJoseph Huber if (cur_type_desc == type_desc_from_type<void>()) 467e0be78beSJoseph Huber return false; 468e0be78beSJoseph Huber 469e0be78beSJoseph Huber if (cur_type_desc == type_desc_from_type<uint32_t>()) 470e0be78beSJoseph Huber args_cur.template next_var<uint32_t>(); 471e0be78beSJoseph Huber else if (cur_type_desc == type_desc_from_type<uint64_t>()) 472e0be78beSJoseph Huber args_cur.template next_var<uint64_t>(); 473e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT 474e0be78beSJoseph Huber // Floating point numbers are stored separately from the other arguments. 475e0be78beSJoseph Huber else if (cur_type_desc == type_desc_from_type<double>()) 476e0be78beSJoseph Huber args_cur.template next_var<double>(); 477e0be78beSJoseph Huber else if (cur_type_desc == type_desc_from_type<long double>()) 478e0be78beSJoseph Huber args_cur.template next_var<long double>(); 479e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT 4808e3b6054SMichael Jones #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT 4818e3b6054SMichael Jones // Floating point numbers may be stored separately from the other 4828e3b6054SMichael Jones // arguments. 4838e3b6054SMichael Jones else if (cur_type_desc == type_desc_from_type<short fract>()) 4848e3b6054SMichael Jones args_cur.template next_var<short fract>(); 4858e3b6054SMichael Jones else if (cur_type_desc == type_desc_from_type<fract>()) 4868e3b6054SMichael Jones args_cur.template next_var<fract>(); 4878e3b6054SMichael Jones else if (cur_type_desc == type_desc_from_type<long fract>()) 4888e3b6054SMichael Jones args_cur.template next_var<long fract>(); 4898e3b6054SMichael Jones else if (cur_type_desc == type_desc_from_type<short accum>()) 4908e3b6054SMichael Jones args_cur.template next_var<short accum>(); 4918e3b6054SMichael Jones else if (cur_type_desc == type_desc_from_type<accum>()) 4928e3b6054SMichael Jones args_cur.template next_var<accum>(); 4938e3b6054SMichael Jones else if (cur_type_desc == type_desc_from_type<long accum>()) 4948e3b6054SMichael Jones args_cur.template next_var<long accum>(); 4958e3b6054SMichael Jones #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT 496e0be78beSJoseph Huber // pointers may be stored separately from normal values. 497e0be78beSJoseph Huber else if (cur_type_desc == type_desc_from_type<void *>()) 498e0be78beSJoseph Huber args_cur.template next_var<void *>(); 499e0be78beSJoseph Huber else 500e0be78beSJoseph Huber args_cur.template next_var<uint32_t>(); 501e0be78beSJoseph Huber 502e0be78beSJoseph Huber ++args_index; 503e0be78beSJoseph Huber } 504e0be78beSJoseph Huber return true; 505e0be78beSJoseph Huber } 506945fa672SMichael Jones 507945fa672SMichael Jones // get_type_desc assumes that this format string uses index mode. It iterates 508945fa672SMichael Jones // through the format string until it finds a format specifier that defines 509945fa672SMichael Jones // the type of index, and returns a TypeDesc describing that type. It does not 510945fa672SMichael Jones // modify cur_pos. 511e0be78beSJoseph Huber LIBC_INLINE TypeDesc get_type_desc(size_t index) { 5121650f1b3SJay Foad // index mode is assumed, and the indices start at 1, so an index 513e0be78beSJoseph Huber // of 0 is invalid. 514e0be78beSJoseph Huber size_t local_pos = 0; 515e0be78beSJoseph Huber 516e0be78beSJoseph Huber while (str[local_pos]) { 517e0be78beSJoseph Huber if (str[local_pos] == '%') { 518e0be78beSJoseph Huber ++local_pos; 519e0be78beSJoseph Huber 520e0be78beSJoseph Huber size_t conv_index = parse_index(&local_pos); 521e0be78beSJoseph Huber 522e0be78beSJoseph Huber // the flags aren't relevant for this situation, but I need to skip past 523e0be78beSJoseph Huber // them so they're parsed but the result is discarded. 524e0be78beSJoseph Huber parse_flags(&local_pos); 525e0be78beSJoseph Huber 526e0be78beSJoseph Huber // handle width 527e0be78beSJoseph Huber if (str[local_pos] == '*') { 528e0be78beSJoseph Huber ++local_pos; 529e0be78beSJoseph Huber 530e0be78beSJoseph Huber size_t width_index = parse_index(&local_pos); 531e0be78beSJoseph Huber set_type_desc(width_index, type_desc_from_type<int>()); 532e0be78beSJoseph Huber if (width_index == index) 533e0be78beSJoseph Huber return type_desc_from_type<int>(); 534e0be78beSJoseph Huber 535e0be78beSJoseph Huber } else if (internal::isdigit(str[local_pos])) { 536e0be78beSJoseph Huber while (internal::isdigit(str[local_pos])) 537e0be78beSJoseph Huber ++local_pos; 538e0be78beSJoseph Huber } 539e0be78beSJoseph Huber 540e0be78beSJoseph Huber // handle precision 541e0be78beSJoseph Huber if (str[local_pos] == '.') { 542e0be78beSJoseph Huber ++local_pos; 543e0be78beSJoseph Huber if (str[local_pos] == '*') { 544e0be78beSJoseph Huber ++local_pos; 545e0be78beSJoseph Huber 546e0be78beSJoseph Huber size_t precision_index = parse_index(&local_pos); 547e0be78beSJoseph Huber set_type_desc(precision_index, type_desc_from_type<int>()); 548e0be78beSJoseph Huber if (precision_index == index) 549e0be78beSJoseph Huber return type_desc_from_type<int>(); 550e0be78beSJoseph Huber 551e0be78beSJoseph Huber } else if (internal::isdigit(str[local_pos])) { 552e0be78beSJoseph Huber while (internal::isdigit(str[local_pos])) 553e0be78beSJoseph Huber ++local_pos; 554e0be78beSJoseph Huber } 555e0be78beSJoseph Huber } 556e0be78beSJoseph Huber 557fdef5c4fSOm Prakaash auto [lm, bw] = parse_length_modifier(&local_pos); 558e0be78beSJoseph Huber 559e0be78beSJoseph Huber // if we don't have an index for this conversion, then its position is 560e0be78beSJoseph Huber // unknown and all this information is irrelevant. The rest of this 561e0be78beSJoseph Huber // logic has been for skipping past this conversion properly to avoid 562e0be78beSJoseph Huber // weirdness with %%. 563e0be78beSJoseph Huber if (conv_index == 0) { 564e0be78beSJoseph Huber if (str[local_pos] != '\0') 565e0be78beSJoseph Huber ++local_pos; 566e0be78beSJoseph Huber continue; 567e0be78beSJoseph Huber } 568e0be78beSJoseph Huber 569e0be78beSJoseph Huber TypeDesc conv_size = type_desc_from_type<void>(); 570e0be78beSJoseph Huber switch (str[local_pos]) { 571e0be78beSJoseph Huber case ('%'): 572e0be78beSJoseph Huber conv_size = type_desc_from_type<void>(); 573e0be78beSJoseph Huber break; 574e0be78beSJoseph Huber case ('c'): 575e0be78beSJoseph Huber conv_size = type_desc_from_type<int>(); 576e0be78beSJoseph Huber break; 577e0be78beSJoseph Huber case ('d'): 578e0be78beSJoseph Huber case ('i'): 579e0be78beSJoseph Huber case ('o'): 580e0be78beSJoseph Huber case ('x'): 581e0be78beSJoseph Huber case ('X'): 582e0be78beSJoseph Huber case ('u'): 583e28ca2ddSArtem Tyurin case ('b'): 584e28ca2ddSArtem Tyurin case ('B'): 585e0be78beSJoseph Huber switch (lm) { 586e0be78beSJoseph Huber case (LengthModifier::hh): 587e0be78beSJoseph Huber case (LengthModifier::h): 588e0be78beSJoseph Huber case (LengthModifier::none): 589e0be78beSJoseph Huber conv_size = type_desc_from_type<int>(); 590e0be78beSJoseph Huber break; 591e0be78beSJoseph Huber case (LengthModifier::l): 592e0be78beSJoseph Huber conv_size = type_desc_from_type<long>(); 593e0be78beSJoseph Huber break; 594e0be78beSJoseph Huber case (LengthModifier::ll): 595e0be78beSJoseph Huber case (LengthModifier::L): // This isn't in the standard, but is in 596e0be78beSJoseph Huber // other libc implementations. 597e0be78beSJoseph Huber conv_size = type_desc_from_type<long long>(); 598e0be78beSJoseph Huber break; 599e0be78beSJoseph Huber case (LengthModifier::j): 600e0be78beSJoseph Huber conv_size = type_desc_from_type<intmax_t>(); 601e0be78beSJoseph Huber break; 602e0be78beSJoseph Huber case (LengthModifier::z): 603e0be78beSJoseph Huber conv_size = type_desc_from_type<size_t>(); 604e0be78beSJoseph Huber break; 605e0be78beSJoseph Huber case (LengthModifier::t): 606e0be78beSJoseph Huber conv_size = type_desc_from_type<ptrdiff_t>(); 607e0be78beSJoseph Huber break; 608fdef5c4fSOm Prakaash case (LengthModifier::w): 609fdef5c4fSOm Prakaash case (LengthModifier::wf): 6106e8a751eSRoland McGrath if (bw <= cpp::numeric_limits<unsigned int>::digits) { 611fdef5c4fSOm Prakaash conv_size = type_desc_from_type<int>(); 6126e8a751eSRoland McGrath } else if (bw <= cpp::numeric_limits<unsigned long>::digits) { 613fdef5c4fSOm Prakaash conv_size = type_desc_from_type<long>(); 6146e8a751eSRoland McGrath } else if (bw <= cpp::numeric_limits<unsigned long long>::digits) { 615fdef5c4fSOm Prakaash conv_size = type_desc_from_type<long long>(); 616fdef5c4fSOm Prakaash } else { 617fdef5c4fSOm Prakaash conv_size = type_desc_from_type<intmax_t>(); 618fdef5c4fSOm Prakaash } 619fdef5c4fSOm Prakaash break; 620e0be78beSJoseph Huber } 621e0be78beSJoseph Huber break; 622e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT 623e0be78beSJoseph Huber case ('f'): 624e0be78beSJoseph Huber case ('F'): 625e0be78beSJoseph Huber case ('e'): 626e0be78beSJoseph Huber case ('E'): 627e0be78beSJoseph Huber case ('a'): 628e0be78beSJoseph Huber case ('A'): 629e0be78beSJoseph Huber case ('g'): 630e0be78beSJoseph Huber case ('G'): 631e0be78beSJoseph Huber if (lm != LengthModifier::L) 632e0be78beSJoseph Huber conv_size = type_desc_from_type<double>(); 633e0be78beSJoseph Huber else 634e0be78beSJoseph Huber conv_size = type_desc_from_type<long double>(); 635e0be78beSJoseph Huber break; 636e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT 6378e3b6054SMichael Jones #ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT 6388e3b6054SMichael Jones // Capitalization represents sign, but we only need to get the right 6398e3b6054SMichael Jones // bitwidth here so we ignore that. 6408e3b6054SMichael Jones case ('r'): 6418e3b6054SMichael Jones case ('R'): 6428e3b6054SMichael Jones conv_size = type_desc_from_type<uint32_t>(); 6438e3b6054SMichael Jones break; 6448e3b6054SMichael Jones case ('k'): 6458e3b6054SMichael Jones case ('K'): 6468e3b6054SMichael Jones if (lm == LengthModifier::l) { 6478e3b6054SMichael Jones conv_size = type_desc_from_type<uint64_t>(); 6488e3b6054SMichael Jones } else { 6498e3b6054SMichael Jones conv_size = type_desc_from_type<uint32_t>(); 6508e3b6054SMichael Jones } 6518e3b6054SMichael Jones break; 6528e3b6054SMichael Jones #endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT 653e0be78beSJoseph Huber #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT 654e0be78beSJoseph Huber case ('n'): 655e0be78beSJoseph Huber #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT 656e0be78beSJoseph Huber case ('p'): 657e0be78beSJoseph Huber case ('s'): 658e0be78beSJoseph Huber conv_size = type_desc_from_type<void *>(); 659e0be78beSJoseph Huber break; 660e0be78beSJoseph Huber default: 661e0be78beSJoseph Huber conv_size = type_desc_from_type<int>(); 662e0be78beSJoseph Huber break; 663e0be78beSJoseph Huber } 664e0be78beSJoseph Huber 665e0be78beSJoseph Huber set_type_desc(conv_index, conv_size); 666e0be78beSJoseph Huber if (conv_index == index) 667e0be78beSJoseph Huber return conv_size; 668e0be78beSJoseph Huber } 669e0be78beSJoseph Huber // If the end of the format section is on the '\0'. This means we need to 670e0be78beSJoseph Huber // not advance the local_pos. 671e0be78beSJoseph Huber if (str[local_pos] != '\0') 672e0be78beSJoseph Huber ++local_pos; 673e0be78beSJoseph Huber } 674e0be78beSJoseph Huber 675e0be78beSJoseph Huber // If there is no size for the requested index, then it's unknown. Return 676e0be78beSJoseph Huber // void. 677e0be78beSJoseph Huber return type_desc_from_type<void>(); 678e0be78beSJoseph Huber } 679945fa672SMichael Jones 680c3228714SGuillaume Chatelet #endif // LIBC_COPT_PRINTF_DISABLE_INDEX_MODE 6814f4752eeSMichael Jones }; 6824f4752eeSMichael Jones 6834f4752eeSMichael Jones } // namespace printf_core 6845ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 6854f4752eeSMichael Jones 6864f4752eeSMichael Jones #endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H 687