1 // -*- C++ -*- 2 //===----------------------------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 12 13 /// \file Contains the std-format-spec parser. 14 /// 15 /// Most of the code can be reused in the chrono-format-spec. 16 /// This header has some support for the chrono-format-spec since it doesn't 17 /// affect the std-format-spec. 18 19 #include <__algorithm/copy_n.h> 20 #include <__algorithm/min.h> 21 #include <__assert> 22 #include <__concepts/arithmetic.h> 23 #include <__concepts/same_as.h> 24 #include <__config> 25 #include <__format/format_arg.h> 26 #include <__format/format_error.h> 27 #include <__format/format_parse_context.h> 28 #include <__format/format_string.h> 29 #include <__format/unicode.h> 30 #include <__format/width_estimation_table.h> 31 #include <__iterator/concepts.h> 32 #include <__iterator/iterator_traits.h> // iter_value_t 33 #include <__memory/addressof.h> 34 #include <__type_traits/common_type.h> 35 #include <__type_traits/is_constant_evaluated.h> 36 #include <__type_traits/is_trivially_copyable.h> 37 #include <__variant/monostate.h> 38 #include <cstdint> 39 #include <string> 40 #include <string_view> 41 42 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 43 # pragma GCC system_header 44 #endif 45 46 _LIBCPP_PUSH_MACROS 47 #include <__undef_macros> 48 49 _LIBCPP_BEGIN_NAMESPACE_STD 50 51 #if _LIBCPP_STD_VER >= 20 52 53 namespace __format_spec { 54 55 [[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void 56 __throw_invalid_option_format_error(const char* __id, const char* __option) { 57 std::__throw_format_error( 58 (string("The format specifier for ") + __id + " does not allow the " + __option + " option").c_str()); 59 } 60 61 [[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_type_format_error(const char* __id) { 62 std::__throw_format_error( 63 (string("The type option contains an invalid value for ") + __id + " formatting argument").c_str()); 64 } 65 66 template <contiguous_iterator _Iterator, class _ParseContext> 67 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result<_Iterator> 68 __parse_arg_id(_Iterator __begin, _Iterator __end, _ParseContext& __ctx) { 69 using _CharT = iter_value_t<_Iterator>; 70 // This function is a wrapper to call the real parser. But it does the 71 // validation for the pre-conditions and post-conditions. 72 if (__begin == __end) 73 std::__throw_format_error("End of input while parsing an argument index"); 74 75 __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __ctx); 76 77 if (__r.__last == __end || *__r.__last != _CharT('}')) 78 std::__throw_format_error("The argument index is invalid"); 79 80 ++__r.__last; 81 return __r; 82 } 83 84 template <class _Context> 85 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t __substitute_arg_id(basic_format_arg<_Context> __format_arg) { 86 // [format.string.std]/8 87 // If the corresponding formatting argument is not of integral type... 88 // This wording allows char and bool too. LWG-3720 changes the wording to 89 // If the corresponding formatting argument is not of standard signed or 90 // unsigned integer type, 91 // This means the 128-bit will not be valid anymore. 92 // TODO FMT Verify this resolution is accepted and add a test to verify 93 // 128-bit integrals fail and switch to visit_format_arg. 94 return std::__visit_format_arg( 95 [](auto __arg) -> uint32_t { 96 using _Type = decltype(__arg); 97 if constexpr (same_as<_Type, monostate>) 98 std::__throw_format_error("The argument index value is too large for the number of arguments supplied"); 99 100 // [format.string.std]/8 101 // If { arg-idopt } is used in a width or precision, the value of the 102 // corresponding formatting argument is used in its place. If the 103 // corresponding formatting argument is not of standard signed or unsigned 104 // integer type, or its value is negative for precision or non-positive for 105 // width, an exception of type format_error is thrown. 106 // 107 // When an integral is used in a format function, it is stored as one of 108 // the types checked below. Other integral types are promoted. For example, 109 // a signed char is stored as an int. 110 if constexpr (same_as<_Type, int> || same_as<_Type, unsigned int> || // 111 same_as<_Type, long long> || same_as<_Type, unsigned long long>) { 112 if constexpr (signed_integral<_Type>) { 113 if (__arg < 0) 114 std::__throw_format_error("An argument index may not have a negative value"); 115 } 116 117 using _CT = common_type_t<_Type, decltype(__format::__number_max)>; 118 if (static_cast<_CT>(__arg) > static_cast<_CT>(__format::__number_max)) 119 std::__throw_format_error("The value of the argument index exceeds its maximum value"); 120 121 return __arg; 122 } else 123 std::__throw_format_error("Replacement argument isn't a standard signed or unsigned integer type"); 124 }, 125 __format_arg); 126 } 127 128 /// These fields are a filter for which elements to parse. 129 /// 130 /// They default to false so when a new field is added it needs to be opted in 131 /// explicitly. 132 struct _LIBCPP_HIDE_FROM_ABI __fields { 133 uint16_t __sign_ : 1 {false}; 134 uint16_t __alternate_form_ : 1 {false}; 135 uint16_t __zero_padding_ : 1 {false}; 136 uint16_t __precision_ : 1 {false}; 137 uint16_t __locale_specific_form_ : 1 {false}; 138 uint16_t __type_ : 1 {false}; 139 // Determines the valid values for fill. 140 // 141 // Originally the fill could be any character except { and }. Range-based 142 // formatters use the colon to mark the beginning of the 143 // underlying-format-spec. To avoid parsing ambiguities these formatter 144 // specializations prohibit the use of the colon as a fill character. 145 uint16_t __use_range_fill_ : 1 {false}; 146 uint16_t __clear_brackets_ : 1 {false}; 147 uint16_t __consume_all_ : 1 {false}; 148 }; 149 150 // By not placing this constant in the formatter class it's not duplicated for 151 // char and wchar_t. 152 inline constexpr __fields __fields_bool{.__locale_specific_form_ = true, .__type_ = true, .__consume_all_ = true}; 153 inline constexpr __fields __fields_integral{ 154 .__sign_ = true, 155 .__alternate_form_ = true, 156 .__zero_padding_ = true, 157 .__locale_specific_form_ = true, 158 .__type_ = true, 159 .__consume_all_ = true}; 160 inline constexpr __fields __fields_floating_point{ 161 .__sign_ = true, 162 .__alternate_form_ = true, 163 .__zero_padding_ = true, 164 .__precision_ = true, 165 .__locale_specific_form_ = true, 166 .__type_ = true, 167 .__consume_all_ = true}; 168 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true, .__consume_all_ = true}; 169 inline constexpr __fields __fields_pointer{.__zero_padding_ = true, .__type_ = true, .__consume_all_ = true}; 170 171 # if _LIBCPP_STD_VER >= 23 172 inline constexpr __fields __fields_tuple{.__use_range_fill_ = true, .__clear_brackets_ = true}; 173 inline constexpr __fields __fields_range{.__use_range_fill_ = true, .__clear_brackets_ = true}; 174 inline constexpr __fields __fields_fill_align_width{}; 175 # endif 176 177 enum class __alignment : uint8_t { 178 /// No alignment is set in the format string. 179 __default, 180 __left, 181 __center, 182 __right, 183 __zero_padding 184 }; 185 186 enum class __sign : uint8_t { 187 /// No sign is set in the format string. 188 /// 189 /// The sign isn't allowed for certain format-types. By using this value 190 /// it's possible to detect whether or not the user explicitly set the sign 191 /// flag. For formatting purposes it behaves the same as \ref __minus. 192 __default, 193 __minus, 194 __plus, 195 __space 196 }; 197 198 enum class __type : uint8_t { 199 __default = 0, 200 __string, 201 __binary_lower_case, 202 __binary_upper_case, 203 __octal, 204 __decimal, 205 __hexadecimal_lower_case, 206 __hexadecimal_upper_case, 207 __pointer_lower_case, 208 __pointer_upper_case, 209 __char, 210 __hexfloat_lower_case, 211 __hexfloat_upper_case, 212 __scientific_lower_case, 213 __scientific_upper_case, 214 __fixed_lower_case, 215 __fixed_upper_case, 216 __general_lower_case, 217 __general_upper_case, 218 __debug 219 }; 220 221 _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __create_type_mask(__type __t) { 222 uint32_t __shift = static_cast<uint32_t>(__t); 223 if (__shift == 0) 224 return 1; 225 226 if (__shift > 31) 227 std::__throw_format_error("The type does not fit in the mask"); 228 229 return 1 << __shift; 230 } 231 232 inline constexpr uint32_t __type_mask_integer = 233 __create_type_mask(__type::__binary_lower_case) | // 234 __create_type_mask(__type::__binary_upper_case) | // 235 __create_type_mask(__type::__decimal) | // 236 __create_type_mask(__type::__octal) | // 237 __create_type_mask(__type::__hexadecimal_lower_case) | // 238 __create_type_mask(__type::__hexadecimal_upper_case); 239 240 struct __std { 241 __alignment __alignment_ : 3; 242 __sign __sign_ : 2; 243 bool __alternate_form_ : 1; 244 bool __locale_specific_form_ : 1; 245 __type __type_; 246 }; 247 248 struct __chrono { 249 __alignment __alignment_ : 3; 250 bool __locale_specific_form_ : 1; 251 bool __hour_ : 1; 252 bool __weekday_name_ : 1; 253 bool __weekday_ : 1; 254 bool __day_of_year_ : 1; 255 bool __week_of_year_ : 1; 256 bool __month_name_ : 1; 257 }; 258 259 // The fill UCS scalar value. 260 // 261 // This is always an array, with 1, 2, or 4 elements. 262 // The size of the data structure is always 32-bits. 263 template <class _CharT> 264 struct __code_point; 265 266 template <> 267 struct __code_point<char> { 268 char __data[4] = {' '}; 269 }; 270 271 # if _LIBCPP_HAS_WIDE_CHARACTERS 272 template <> 273 struct __code_point<wchar_t> { 274 wchar_t __data[4 / sizeof(wchar_t)] = {L' '}; 275 }; 276 # endif 277 278 /// Contains the parsed formatting specifications. 279 /// 280 /// This contains information for both the std-format-spec and the 281 /// chrono-format-spec. This results in some unused members for both 282 /// specifications. However these unused members don't increase the size 283 /// of the structure. 284 /// 285 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be 286 /// kept stable. 287 template <class _CharT> 288 struct __parsed_specifications { 289 union { 290 // The field __alignment_ is the first element in __std_ and __chrono_. 291 // This allows the code to always inspect this value regards which member 292 // of the union is the active member [class.union.general]/2. 293 // 294 // This is needed since the generic output routines handle the alignment of 295 // the output. 296 __alignment __alignment_ : 3; 297 __std __std_; 298 __chrono __chrono_; 299 }; 300 301 /// The requested width. 302 /// 303 /// When the format-spec used an arg-id for this field it has already been 304 /// replaced with the value of that arg-id. 305 int32_t __width_; 306 307 /// The requested precision. 308 /// 309 /// When the format-spec used an arg-id for this field it has already been 310 /// replaced with the value of that arg-id. 311 int32_t __precision_; 312 313 __code_point<_CharT> __fill_; 314 315 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; } 316 317 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; } 318 }; 319 320 // Validate the struct is small and cheap to copy since the struct is passed by 321 // value in formatting functions. 322 static_assert(sizeof(__parsed_specifications<char>) == 16); 323 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>); 324 # if _LIBCPP_HAS_WIDE_CHARACTERS 325 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16); 326 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>); 327 # endif 328 329 /// The parser for the std-format-spec. 330 /// 331 /// Note this class is a member of std::formatter specializations. It's 332 /// expected developers will create their own formatter specializations that 333 /// inherit from the std::formatter specializations. This means this class 334 /// must be ABI stable. To aid the stability the unused bits in the class are 335 /// set to zero. That way they can be repurposed if a future revision of the 336 /// Standards adds new fields to std-format-spec. 337 template <class _CharT> 338 class _LIBCPP_TEMPLATE_VIS __parser { 339 public: 340 // Parses the format specification. 341 // 342 // Depending on whether the parsing is done compile-time or run-time 343 // the method slightly differs. 344 // - Only parses a field when it is in the __fields. Accepting all 345 // fields and then validating the valid ones has a performance impact. 346 // This is faster but gives slighly worse error messages. 347 // - At compile-time when a field is not accepted the parser will still 348 // parse it and give an error when it's present. This gives a more 349 // accurate error. 350 // The idea is that most times the format instead of the vformat 351 // functions are used. In that case the error will be detected during 352 // compilation and there is no need to pay for the run-time overhead. 353 template <class _ParseContext> 354 _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator __parse(_ParseContext& __ctx, __fields __fields) { 355 auto __begin = __ctx.begin(); 356 auto __end = __ctx.end(); 357 if (__begin == __end || *__begin == _CharT('}') || (__fields.__use_range_fill_ && *__begin == _CharT(':'))) 358 return __begin; 359 360 if (__parse_fill_align(__begin, __end) && __begin == __end) 361 return __begin; 362 363 if (__fields.__sign_) { 364 if (__parse_sign(__begin) && __begin == __end) 365 return __begin; 366 } else if (std::is_constant_evaluated() && __parse_sign(__begin)) { 367 std::__throw_format_error("The format specification does not allow the sign option"); 368 } 369 370 if (__fields.__alternate_form_) { 371 if (__parse_alternate_form(__begin) && __begin == __end) 372 return __begin; 373 } else if (std::is_constant_evaluated() && __parse_alternate_form(__begin)) { 374 std::__throw_format_error("The format specifier does not allow the alternate form option"); 375 } 376 377 if (__fields.__zero_padding_) { 378 if (__parse_zero_padding(__begin) && __begin == __end) 379 return __begin; 380 } else if (std::is_constant_evaluated() && __parse_zero_padding(__begin)) { 381 std::__throw_format_error("The format specifier does not allow the zero-padding option"); 382 } 383 384 if (__parse_width(__begin, __end, __ctx) && __begin == __end) 385 return __begin; 386 387 if (__fields.__precision_) { 388 if (__parse_precision(__begin, __end, __ctx) && __begin == __end) 389 return __begin; 390 } else if (std::is_constant_evaluated() && __parse_precision(__begin, __end, __ctx)) { 391 std::__throw_format_error("The format specifier does not allow the precision option"); 392 } 393 394 if (__fields.__locale_specific_form_) { 395 if (__parse_locale_specific_form(__begin) && __begin == __end) 396 return __begin; 397 } else if (std::is_constant_evaluated() && __parse_locale_specific_form(__begin)) { 398 std::__throw_format_error("The format specifier does not allow the locale-specific form option"); 399 } 400 401 if (__fields.__clear_brackets_) { 402 if (__parse_clear_brackets(__begin) && __begin == __end) 403 return __begin; 404 } else if (std::is_constant_evaluated() && __parse_clear_brackets(__begin)) { 405 std::__throw_format_error("The format specifier does not allow the n option"); 406 } 407 408 if (__fields.__type_) 409 __parse_type(__begin); 410 411 if (!__fields.__consume_all_) 412 return __begin; 413 414 if (__begin != __end && *__begin != _CharT('}')) 415 std::__throw_format_error("The format specifier should consume the input or end with a '}'"); 416 417 return __begin; 418 } 419 420 // Validates the selected the parsed data. 421 // 422 // The valid fields in the parser may depend on the display type 423 // selected. But the type is the last optional field, so by the time 424 // it's known an option can't be used, it already has been parsed. 425 // This does the validation again. 426 // 427 // For example an integral may have a sign, zero-padding, or alternate 428 // form when the type option is not 'c'. So the generic approach is: 429 // 430 // typename _ParseContext::iterator __result = __parser_.__parse(__ctx, __format_spec::__fields_integral); 431 // if (__parser.__type_ == __format_spec::__type::__char) { 432 // __parser.__validate((__format_spec::__fields_bool, "an integer"); 433 // ... // more char adjustments 434 // } else { 435 // ... // validate an integral type. 436 // } 437 // 438 // For some types all valid options need a second validation run, like 439 // boolean types. 440 // 441 // Depending on whether the validation is done at compile-time or 442 // run-time the error differs 443 // - run-time the exception is thrown and contains the type of field 444 // being validated. 445 // - at compile-time the line with `std::__throw_format_error` is shown 446 // in the output. In that case it's important for the error to be on one 447 // line. 448 // Note future versions of C++ may allow better compile-time error 449 // reporting. 450 _LIBCPP_HIDE_FROM_ABI constexpr void 451 __validate(__fields __fields, const char* __id, uint32_t __type_mask = -1) const { 452 if (!__fields.__sign_ && __sign_ != __sign::__default) { 453 if (std::is_constant_evaluated()) 454 std::__throw_format_error("The format specifier does not allow the sign option"); 455 else 456 __format_spec::__throw_invalid_option_format_error(__id, "sign"); 457 } 458 459 if (!__fields.__alternate_form_ && __alternate_form_) { 460 if (std::is_constant_evaluated()) 461 std::__throw_format_error("The format specifier does not allow the alternate form option"); 462 else 463 __format_spec::__throw_invalid_option_format_error(__id, "alternate form"); 464 } 465 466 if (!__fields.__zero_padding_ && __alignment_ == __alignment::__zero_padding) { 467 if (std::is_constant_evaluated()) 468 std::__throw_format_error("The format specifier does not allow the zero-padding option"); 469 else 470 __format_spec::__throw_invalid_option_format_error(__id, "zero-padding"); 471 } 472 473 if (!__fields.__precision_ && __precision_ != -1) { // Works both when the precision has a value or an arg-id. 474 if (std::is_constant_evaluated()) 475 std::__throw_format_error("The format specifier does not allow the precision option"); 476 else 477 __format_spec::__throw_invalid_option_format_error(__id, "precision"); 478 } 479 480 if (!__fields.__locale_specific_form_ && __locale_specific_form_) { 481 if (std::is_constant_evaluated()) 482 std::__throw_format_error("The format specifier does not allow the locale-specific form option"); 483 else 484 __format_spec::__throw_invalid_option_format_error(__id, "locale-specific form"); 485 } 486 487 if ((__create_type_mask(__type_) & __type_mask) == 0) { 488 if (std::is_constant_evaluated()) 489 std::__throw_format_error("The format specifier uses an invalid value for the type option"); 490 else 491 __format_spec::__throw_invalid_type_format_error(__id); 492 } 493 } 494 495 /// \returns the `__parsed_specifications` with the resolved dynamic sizes.. 496 _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const { 497 return __parsed_specifications<_CharT>{ 498 .__std_ = __std{.__alignment_ = __alignment_, 499 .__sign_ = __sign_, 500 .__alternate_form_ = __alternate_form_, 501 .__locale_specific_form_ = __locale_specific_form_, 502 .__type_ = __type_}, 503 .__width_{__get_width(__ctx)}, 504 .__precision_{__get_precision(__ctx)}, 505 .__fill_{__fill_}}; 506 } 507 508 _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_chrono_specifications(auto& __ctx) const { 509 return __parsed_specifications<_CharT>{ 510 .__chrono_ = 511 __chrono{.__alignment_ = __alignment_, 512 .__locale_specific_form_ = __locale_specific_form_, 513 .__hour_ = __hour_, 514 .__weekday_name_ = __weekday_name_, 515 .__weekday_ = __weekday_, 516 .__day_of_year_ = __day_of_year_, 517 .__week_of_year_ = __week_of_year_, 518 .__month_name_ = __month_name_}, 519 .__width_{__get_width(__ctx)}, 520 .__precision_{__get_precision(__ctx)}, 521 .__fill_{__fill_}}; 522 } 523 524 __alignment __alignment_ : 3 {__alignment::__default}; 525 __sign __sign_ : 2 {__sign::__default}; 526 bool __alternate_form_ : 1 {false}; 527 bool __locale_specific_form_ : 1 {false}; 528 bool __clear_brackets_ : 1 {false}; 529 __type __type_{__type::__default}; 530 531 // These flags are only used for formatting chrono. Since the struct has 532 // padding space left it's added to this structure. 533 bool __hour_ : 1 {false}; 534 535 bool __weekday_name_ : 1 {false}; 536 bool __weekday_ : 1 {false}; 537 538 bool __day_of_year_ : 1 {false}; 539 bool __week_of_year_ : 1 {false}; 540 541 bool __month_name_ : 1 {false}; 542 543 uint8_t __reserved_0_ : 2 {0}; 544 uint8_t __reserved_1_ : 6 {0}; 545 // These two flags are only used internally and not part of the 546 // __parsed_specifications. Therefore put them at the end. 547 bool __width_as_arg_ : 1 {false}; 548 bool __precision_as_arg_ : 1 {false}; 549 550 /// The requested width, either the value or the arg-id. 551 int32_t __width_{0}; 552 553 /// The requested precision, either the value or the arg-id. 554 int32_t __precision_{-1}; 555 556 __code_point<_CharT> __fill_{}; 557 558 private: 559 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) { 560 switch (__c) { 561 case _CharT('<'): 562 __alignment_ = __alignment::__left; 563 return true; 564 565 case _CharT('^'): 566 __alignment_ = __alignment::__center; 567 return true; 568 569 case _CharT('>'): 570 __alignment_ = __alignment::__right; 571 return true; 572 } 573 return false; 574 } 575 576 _LIBCPP_HIDE_FROM_ABI constexpr void __validate_fill_character(_CharT __fill) { 577 // The forbidden fill characters all code points formed from a single code unit, thus the 578 // check can be omitted when more code units are used. 579 if (__fill == _CharT('{')) 580 std::__throw_format_error("The fill option contains an invalid value"); 581 } 582 583 # if _LIBCPP_HAS_UNICODE 584 // range-fill and tuple-fill are identical 585 template <contiguous_iterator _Iterator> 586 requires same_as<_CharT, char> 587 # if _LIBCPP_HAS_WIDE_CHARACTERS 588 || (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) 589 # endif 590 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) { 591 _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( 592 __begin != __end, 593 "when called with an empty input the function will cause " 594 "undefined behavior by evaluating data not in the input"); 595 __unicode::__code_point_view<_CharT> __view{__begin, __end}; 596 __unicode::__consume_result __consumed = __view.__consume(); 597 if (__consumed.__status != __unicode::__consume_result::__ok) 598 std::__throw_format_error("The format specifier contains malformed Unicode characters"); 599 600 if (__view.__position() < __end && __parse_alignment(*__view.__position())) { 601 ptrdiff_t __code_units = __view.__position() - __begin; 602 if (__code_units == 1) 603 // The forbidden fill characters all are code points encoded 604 // in one code unit, thus the check can be omitted when more 605 // code units are used. 606 __validate_fill_character(*__begin); 607 608 std::copy_n(__begin, __code_units, std::addressof(__fill_.__data[0])); 609 __begin += __code_units + 1; 610 return true; 611 } 612 613 if (!__parse_alignment(*__begin)) 614 return false; 615 616 ++__begin; 617 return true; 618 } 619 620 # if _LIBCPP_HAS_WIDE_CHARACTERS 621 template <contiguous_iterator _Iterator> 622 requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) 623 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) { 624 _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( 625 __begin != __end, 626 "when called with an empty input the function will cause " 627 "undefined behavior by evaluating data not in the input"); 628 if (__begin + 1 != __end && __parse_alignment(*(__begin + 1))) { 629 if (!__unicode::__is_scalar_value(*__begin)) 630 std::__throw_format_error("The fill option contains an invalid value"); 631 632 __validate_fill_character(*__begin); 633 634 __fill_.__data[0] = *__begin; 635 __begin += 2; 636 return true; 637 } 638 639 if (!__parse_alignment(*__begin)) 640 return false; 641 642 ++__begin; 643 return true; 644 } 645 646 # endif // _LIBCPP_HAS_WIDE_CHARACTERS 647 648 # else // _LIBCPP_HAS_UNICODE 649 // range-fill and tuple-fill are identical 650 template <contiguous_iterator _Iterator> 651 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) { 652 _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( 653 __begin != __end, 654 "when called with an empty input the function will cause " 655 "undefined behavior by evaluating data not in the input"); 656 if (__begin + 1 != __end) { 657 if (__parse_alignment(*(__begin + 1))) { 658 __validate_fill_character(*__begin); 659 660 __fill_.__data[0] = *__begin; 661 __begin += 2; 662 return true; 663 } 664 } 665 666 if (!__parse_alignment(*__begin)) 667 return false; 668 669 ++__begin; 670 return true; 671 } 672 673 # endif // _LIBCPP_HAS_UNICODE 674 675 template <contiguous_iterator _Iterator> 676 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) { 677 switch (*__begin) { 678 case _CharT('-'): 679 __sign_ = __sign::__minus; 680 break; 681 case _CharT('+'): 682 __sign_ = __sign::__plus; 683 break; 684 case _CharT(' '): 685 __sign_ = __sign::__space; 686 break; 687 default: 688 return false; 689 } 690 ++__begin; 691 return true; 692 } 693 694 template <contiguous_iterator _Iterator> 695 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(_Iterator& __begin) { 696 if (*__begin != _CharT('#')) 697 return false; 698 699 __alternate_form_ = true; 700 ++__begin; 701 return true; 702 } 703 704 template <contiguous_iterator _Iterator> 705 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(_Iterator& __begin) { 706 if (*__begin != _CharT('0')) 707 return false; 708 709 if (__alignment_ == __alignment::__default) 710 __alignment_ = __alignment::__zero_padding; 711 ++__begin; 712 return true; 713 } 714 715 template <contiguous_iterator _Iterator> 716 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(_Iterator& __begin, _Iterator __end, auto& __ctx) { 717 if (*__begin == _CharT('0')) 718 std::__throw_format_error("The width option should not have a leading zero"); 719 720 if (*__begin == _CharT('{')) { 721 __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __ctx); 722 __width_as_arg_ = true; 723 __width_ = __r.__value; 724 __begin = __r.__last; 725 return true; 726 } 727 728 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 729 return false; 730 731 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 732 __width_ = __r.__value; 733 _LIBCPP_ASSERT_INTERNAL(__width_ != 0, 734 "A zero value isn't allowed and should be impossible, " 735 "due to validations in this function"); 736 __begin = __r.__last; 737 return true; 738 } 739 740 template <contiguous_iterator _Iterator> 741 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(_Iterator& __begin, _Iterator __end, auto& __ctx) { 742 if (*__begin != _CharT('.')) 743 return false; 744 745 ++__begin; 746 if (__begin == __end) 747 std::__throw_format_error("End of input while parsing format specifier precision"); 748 749 if (*__begin == _CharT('{')) { 750 __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __ctx); 751 __precision_as_arg_ = true; 752 __precision_ = __arg_id.__value; 753 __begin = __arg_id.__last; 754 return true; 755 } 756 757 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 758 std::__throw_format_error("The precision option does not contain a value or an argument index"); 759 760 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 761 __precision_ = __r.__value; 762 __precision_as_arg_ = false; 763 __begin = __r.__last; 764 return true; 765 } 766 767 template <contiguous_iterator _Iterator> 768 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(_Iterator& __begin) { 769 if (*__begin != _CharT('L')) 770 return false; 771 772 __locale_specific_form_ = true; 773 ++__begin; 774 return true; 775 } 776 777 template <contiguous_iterator _Iterator> 778 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_clear_brackets(_Iterator& __begin) { 779 if (*__begin != _CharT('n')) 780 return false; 781 782 __clear_brackets_ = true; 783 ++__begin; 784 return true; 785 } 786 787 template <contiguous_iterator _Iterator> 788 _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(_Iterator& __begin) { 789 // Determines the type. It does not validate whether the selected type is 790 // valid. Most formatters have optional fields that are only allowed for 791 // certain types. These parsers need to do validation after the type has 792 // been parsed. So its easier to implement the validation for all types in 793 // the specific parse function. 794 switch (*__begin) { 795 case 'A': 796 __type_ = __type::__hexfloat_upper_case; 797 break; 798 case 'B': 799 __type_ = __type::__binary_upper_case; 800 break; 801 case 'E': 802 __type_ = __type::__scientific_upper_case; 803 break; 804 case 'F': 805 __type_ = __type::__fixed_upper_case; 806 break; 807 case 'G': 808 __type_ = __type::__general_upper_case; 809 break; 810 case 'X': 811 __type_ = __type::__hexadecimal_upper_case; 812 break; 813 case 'a': 814 __type_ = __type::__hexfloat_lower_case; 815 break; 816 case 'b': 817 __type_ = __type::__binary_lower_case; 818 break; 819 case 'c': 820 __type_ = __type::__char; 821 break; 822 case 'd': 823 __type_ = __type::__decimal; 824 break; 825 case 'e': 826 __type_ = __type::__scientific_lower_case; 827 break; 828 case 'f': 829 __type_ = __type::__fixed_lower_case; 830 break; 831 case 'g': 832 __type_ = __type::__general_lower_case; 833 break; 834 case 'o': 835 __type_ = __type::__octal; 836 break; 837 case 'p': 838 __type_ = __type::__pointer_lower_case; 839 break; 840 case 'P': 841 __type_ = __type::__pointer_upper_case; 842 break; 843 case 's': 844 __type_ = __type::__string; 845 break; 846 case 'x': 847 __type_ = __type::__hexadecimal_lower_case; 848 break; 849 # if _LIBCPP_STD_VER >= 23 850 case '?': 851 __type_ = __type::__debug; 852 break; 853 # endif 854 default: 855 return; 856 } 857 ++__begin; 858 } 859 860 _LIBCPP_HIDE_FROM_ABI int32_t __get_width(auto& __ctx) const { 861 if (!__width_as_arg_) 862 return __width_; 863 864 return __format_spec::__substitute_arg_id(__ctx.arg(__width_)); 865 } 866 867 _LIBCPP_HIDE_FROM_ABI int32_t __get_precision(auto& __ctx) const { 868 if (!__precision_as_arg_) 869 return __precision_; 870 871 return __format_spec::__substitute_arg_id(__ctx.arg(__precision_)); 872 } 873 }; 874 875 // Validates whether the reserved bitfields don't change the size. 876 static_assert(sizeof(__parser<char>) == 16); 877 # if _LIBCPP_HAS_WIDE_CHARACTERS 878 static_assert(sizeof(__parser<wchar_t>) == 16); 879 # endif 880 881 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) { 882 switch (__type) { 883 case __format_spec::__type::__default: 884 case __format_spec::__type::__string: 885 case __format_spec::__type::__debug: 886 break; 887 888 default: 889 std::__throw_format_error("The type option contains an invalid value for a string formatting argument"); 890 } 891 } 892 893 template <class _CharT> 894 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser, const char* __id) { 895 __parser.__validate(__format_spec::__fields_bool, __id); 896 if (__parser.__alignment_ == __alignment::__default) 897 __parser.__alignment_ = __alignment::__left; 898 } 899 900 template <class _CharT> 901 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser, const char* __id) { 902 __format_spec::__process_display_type_bool_string(__parser, __id); 903 } 904 905 template <class _CharT> 906 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser, const char* __id) { 907 switch (__parser.__type_) { 908 case __format_spec::__type::__default: 909 case __format_spec::__type::__string: 910 __format_spec::__process_display_type_bool_string(__parser, __id); 911 break; 912 913 case __format_spec::__type::__binary_lower_case: 914 case __format_spec::__type::__binary_upper_case: 915 case __format_spec::__type::__octal: 916 case __format_spec::__type::__decimal: 917 case __format_spec::__type::__hexadecimal_lower_case: 918 case __format_spec::__type::__hexadecimal_upper_case: 919 break; 920 921 default: 922 __format_spec::__throw_invalid_type_format_error(__id); 923 } 924 } 925 926 template <class _CharT> 927 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser, const char* __id) { 928 switch (__parser.__type_) { 929 case __format_spec::__type::__default: 930 case __format_spec::__type::__char: 931 case __format_spec::__type::__debug: 932 __format_spec::__process_display_type_char(__parser, __id); 933 break; 934 935 case __format_spec::__type::__binary_lower_case: 936 case __format_spec::__type::__binary_upper_case: 937 case __format_spec::__type::__octal: 938 case __format_spec::__type::__decimal: 939 case __format_spec::__type::__hexadecimal_lower_case: 940 case __format_spec::__type::__hexadecimal_upper_case: 941 break; 942 943 default: 944 __format_spec::__throw_invalid_type_format_error(__id); 945 } 946 } 947 948 template <class _CharT> 949 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser, const char* __id) { 950 switch (__parser.__type_) { 951 case __format_spec::__type::__default: 952 case __format_spec::__type::__binary_lower_case: 953 case __format_spec::__type::__binary_upper_case: 954 case __format_spec::__type::__octal: 955 case __format_spec::__type::__decimal: 956 case __format_spec::__type::__hexadecimal_lower_case: 957 case __format_spec::__type::__hexadecimal_upper_case: 958 break; 959 960 case __format_spec::__type::__char: 961 __format_spec::__process_display_type_char(__parser, __id); 962 break; 963 964 default: 965 __format_spec::__throw_invalid_type_format_error(__id); 966 } 967 } 968 969 template <class _CharT> 970 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser, const char* __id) { 971 switch (__parser.__type_) { 972 case __format_spec::__type::__default: 973 case __format_spec::__type::__hexfloat_lower_case: 974 case __format_spec::__type::__hexfloat_upper_case: 975 // Precision specific behavior will be handled later. 976 break; 977 case __format_spec::__type::__scientific_lower_case: 978 case __format_spec::__type::__scientific_upper_case: 979 case __format_spec::__type::__fixed_lower_case: 980 case __format_spec::__type::__fixed_upper_case: 981 case __format_spec::__type::__general_lower_case: 982 case __format_spec::__type::__general_upper_case: 983 if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1) 984 // Set the default precision for the call to to_chars. 985 __parser.__precision_ = 6; 986 break; 987 988 default: 989 __format_spec::__throw_invalid_type_format_error(__id); 990 } 991 } 992 993 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type, const char* __id) { 994 switch (__type) { 995 case __format_spec::__type::__default: 996 case __format_spec::__type::__pointer_lower_case: 997 case __format_spec::__type::__pointer_upper_case: 998 break; 999 1000 default: 1001 __format_spec::__throw_invalid_type_format_error(__id); 1002 } 1003 } 1004 1005 template <contiguous_iterator _Iterator> 1006 struct __column_width_result { 1007 /// The number of output columns. 1008 size_t __width_; 1009 /// One beyond the last code unit used in the estimation. 1010 /// 1011 /// This limits the original output to fit in the wanted number of columns. 1012 _Iterator __last_; 1013 }; 1014 1015 template <contiguous_iterator _Iterator> 1016 __column_width_result(size_t, _Iterator) -> __column_width_result<_Iterator>; 1017 1018 /// Since a column width can be two it's possible that the requested column 1019 /// width can't be achieved. Depending on the intended usage the policy can be 1020 /// selected. 1021 /// - When used as precision the maximum width may not be exceeded and the 1022 /// result should be "rounded down" to the previous boundary. 1023 /// - When used as a width we're done once the minimum is reached, but 1024 /// exceeding is not an issue. Rounding down is an issue since that will 1025 /// result in writing fill characters. Therefore the result needs to be 1026 /// "rounded up". 1027 enum class __column_width_rounding { __down, __up }; 1028 1029 # if _LIBCPP_HAS_UNICODE 1030 1031 namespace __detail { 1032 template <contiguous_iterator _Iterator> 1033 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width_grapheme_clustering( 1034 _Iterator __first, _Iterator __last, size_t __maximum, __column_width_rounding __rounding) noexcept { 1035 using _CharT = iter_value_t<_Iterator>; 1036 __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last}; 1037 1038 __column_width_result<_Iterator> __result{0, __first}; 1039 while (__result.__last_ != __last && __result.__width_ <= __maximum) { 1040 typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume(); 1041 int __width = __width_estimation_table::__estimated_width(__cluster.__code_point_); 1042 1043 // When the next entry would exceed the maximum width the previous width 1044 // might be returned. For example when a width of 100 is requested the 1045 // returned width might be 99, since the next code point has an estimated 1046 // column width of 2. This depends on the rounding flag. 1047 // When the maximum is exceeded the loop will abort the next iteration. 1048 if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum) 1049 return __result; 1050 1051 __result.__width_ += __width; 1052 __result.__last_ = __cluster.__last_; 1053 } 1054 1055 return __result; 1056 } 1057 1058 } // namespace __detail 1059 1060 // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32. 1061 // Depending on format the relation between the number of code units stored and 1062 // the number of output columns differs. The first relation is the number of 1063 // code units forming a code point. (The text assumes the code units are 1064 // unsigned.) 1065 // - UTF-8 The number of code units is between one and four. The first 127 1066 // Unicode code points match the ASCII character set. When the highest bit is 1067 // set it means the code point has more than one code unit. 1068 // - UTF-16: The number of code units is between 1 and 2. When the first 1069 // code unit is in the range [0xd800,0xdfff) it means the code point uses two 1070 // code units. 1071 // - UTF-32: The number of code units is always one. 1072 // 1073 // The code point to the number of columns is specified in 1074 // [format.string.std]/11. This list might change in the future. 1075 // 1076 // Another thing to be taken into account is Grapheme clustering. This means 1077 // that in some cases multiple code points are combined one element in the 1078 // output. For example: 1079 // - an ASCII character with a combined diacritical mark 1080 // - an emoji with a skin tone modifier 1081 // - a group of combined people emoji to create a family 1082 // - a combination of flag emoji 1083 // 1084 // See also: 1085 // - [format.string.general]/11 1086 // - https://en.wikipedia.org/wiki/UTF-8#Encoding 1087 // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF 1088 1089 _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; } 1090 1091 /// Determines the number of output columns needed to render the input. 1092 /// 1093 /// \note When the scanner encounters malformed Unicode it acts as-if every 1094 /// code unit is a one column code point. Typically a terminal uses the same 1095 /// strategy and replaces every malformed code unit with a one column 1096 /// replacement character. 1097 /// 1098 /// \param __first Points to the first element of the input range. 1099 /// \param __last Points beyond the last element of the input range. 1100 /// \param __maximum The maximum number of output columns. The returned number 1101 /// of estimated output columns will not exceed this value. 1102 /// \param __rounding Selects the rounding method. 1103 /// \c __down result.__width_ <= __maximum 1104 /// \c __up result.__width_ <= __maximum + 1 1105 template <class _CharT, class _Iterator = typename basic_string_view<_CharT>::const_iterator> 1106 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width( 1107 basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept { 1108 // The width estimation is done in two steps: 1109 // - Quickly process for the ASCII part. ASCII has the following properties 1110 // - One code unit is one code point 1111 // - Every code point has an estimated width of one 1112 // - When needed it will a Unicode Grapheme clustering algorithm to find 1113 // the proper place for truncation. 1114 1115 if (__str.empty() || __maximum == 0) 1116 return {0, __str.begin()}; 1117 1118 // ASCII has one caveat; when an ASCII character is followed by a non-ASCII 1119 // character they might be part of an extended grapheme cluster. For example: 1120 // an ASCII letter and a COMBINING ACUTE ACCENT 1121 // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we 1122 // need to scan one code unit beyond the requested precision. When this code 1123 // unit is non-ASCII we omit the current code unit and let the Grapheme 1124 // clustering algorithm do its work. 1125 auto __it = __str.begin(); 1126 if (__format_spec::__is_ascii(*__it)) { 1127 do { 1128 --__maximum; 1129 ++__it; 1130 if (__it == __str.end()) 1131 return {__str.size(), __str.end()}; 1132 1133 if (__maximum == 0) { 1134 if (__format_spec::__is_ascii(*__it)) 1135 return {static_cast<size_t>(__it - __str.begin()), __it}; 1136 1137 break; 1138 } 1139 } while (__format_spec::__is_ascii(*__it)); 1140 --__it; 1141 ++__maximum; 1142 } 1143 1144 ptrdiff_t __ascii_size = __it - __str.begin(); 1145 __column_width_result __result = 1146 __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding); 1147 1148 __result.__width_ += __ascii_size; 1149 return __result; 1150 } 1151 # else // _LIBCPP_HAS_UNICODE 1152 template <class _CharT> 1153 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<typename basic_string_view<_CharT>::const_iterator> 1154 __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept { 1155 // When Unicode isn't supported assume ASCII and every code unit is one code 1156 // point. In ASCII the estimated column width is always one. Thus there's no 1157 // need for rounding. 1158 size_t __width = std::min(__str.size(), __maximum); 1159 return {__width, __str.begin() + __width}; 1160 } 1161 1162 # endif // _LIBCPP_HAS_UNICODE 1163 1164 } // namespace __format_spec 1165 1166 #endif // _LIBCPP_STD_VER >= 20 1167 1168 _LIBCPP_END_NAMESPACE_STD 1169 1170 _LIBCPP_POP_MACROS 1171 1172 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 1173