1 // -*- C++ -*- 2 //===----------------------------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H 11 #define _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H 12 13 #include <__algorithm/copy.h> 14 #include <__algorithm/copy_n.h> 15 #include <__algorithm/fill_n.h> 16 #include <__algorithm/transform.h> 17 #include <__config> 18 #include <__format/format_error.h> 19 #include <__format/format_fwd.h> 20 #include <__format/formatter.h> 21 #include <__format/parser_std_format_spec.h> 22 #include <array> 23 #include <charconv> 24 #include <concepts> 25 #include <limits> 26 #include <string> 27 28 #ifndef _LIBCPP_HAS_NO_LOCALIZATION 29 #include <locale> 30 #endif 31 32 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 33 #pragma GCC system_header 34 #endif 35 36 _LIBCPP_PUSH_MACROS 37 #include <__undef_macros> 38 39 _LIBCPP_BEGIN_NAMESPACE_STD 40 41 #if _LIBCPP_STD_VER > 17 42 43 // TODO FMT Remove this once we require compilers with proper C++20 support. 44 // If the compiler has no concepts support, the format header will be disabled. 45 // Without concepts support enable_if needs to be used and that too much effort 46 // to support compilers with partial C++20 support. 47 #if !defined(_LIBCPP_HAS_NO_CONCEPTS) 48 49 /** 50 * Integral formatting classes. 51 * 52 * There are two types used here: 53 * * C++-type, the type as used in C++. 54 * * format-type, the output type specified in the std-format-spec. 55 * 56 * Design of the integral formatters consists of several layers. 57 * * @ref __parser_integral The basic std-format-spec parser for all integral 58 * classes. This parser does the basic sanity checks. It also contains some 59 * helper functions that are nice to have available for all parsers. 60 * * A C++-type specific parser. These parsers must derive from 61 * @ref __parser_integral. Their task is to validate whether the parsed 62 * std-format-spec is valid for the C++-type and selected format-type. After 63 * validation they need to make sure all members are properly set. For 64 * example, when the alignment hasn't changed it needs to set the proper 65 * default alignment for the format-type. The following parsers are available: 66 * - @ref __parser_integer 67 * - @ref __parser_char 68 * - @ref __parser_bool 69 * * A general formatter for all integral types @ref __formatter_integral. This 70 * formatter can handle all formatting of integers and characters. The class 71 * derives from the proper formatter. 72 * Note the boolean string format-type isn't supported in this class. 73 * * A typedef C++-type group combining the @ref __formatter_integral with a 74 * parser: 75 * * @ref __formatter_integer 76 * * @ref __formatter_char 77 * * @ref __formatter_bool 78 * * Then every C++-type has its own formatter specializations. They inherit 79 * from the C++-type group typedef. Most specializations need nothing else. 80 * Others need some additional specializations in this class. 81 */ 82 namespace __format_spec { 83 84 /** Wrapper around @ref to_chars, returning the output pointer. */ 85 template <integral _Tp> 86 _LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, 87 _Tp __value, int __base) { 88 // TODO FMT Evaluate code overhead due to not calling the internal function 89 // directly. (Should be zero overhead.) 90 to_chars_result __r = _VSTD::to_chars(__first, __last, __value, __base); 91 _LIBCPP_ASSERT(__r.ec == errc(0), "Internal buffer too small"); 92 return __r.ptr; 93 } 94 95 /** 96 * Helper to determine the buffer size to output a integer in Base @em x. 97 * 98 * There are several overloads for the supported bases. The function uses the 99 * base as template argument so it can be used in a constant expression. 100 */ 101 template <unsigned_integral _Tp, size_t _Base> 102 _LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept 103 requires(_Base == 2) { 104 return numeric_limits<_Tp>::digits // The number of binary digits. 105 + 2 // Reserve space for the '0[Bb]' prefix. 106 + 1; // Reserve space for the sign. 107 } 108 109 template <unsigned_integral _Tp, size_t _Base> 110 _LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept 111 requires(_Base == 8) { 112 return numeric_limits<_Tp>::digits // The number of binary digits. 113 / 3 // Adjust to octal. 114 + 1 // Turn floor to ceil. 115 + 1 // Reserve space for the '0' prefix. 116 + 1; // Reserve space for the sign. 117 } 118 119 template <unsigned_integral _Tp, size_t _Base> 120 _LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept 121 requires(_Base == 10) { 122 return numeric_limits<_Tp>::digits10 // The floored value. 123 + 1 // Turn floor to ceil. 124 + 1; // Reserve space for the sign. 125 } 126 127 template <unsigned_integral _Tp, size_t _Base> 128 _LIBCPP_HIDE_FROM_ABI constexpr size_t __buffer_size() noexcept 129 requires(_Base == 16) { 130 return numeric_limits<_Tp>::digits // The number of binary digits. 131 / 4 // Adjust to hexadecimal. 132 + 2 // Reserve space for the '0[Xx]' prefix. 133 + 1; // Reserve space for the sign. 134 } 135 136 /** 137 * Determines the required grouping based on the size of the input. 138 * 139 * The grouping's last element will be repeated. For simplicity this repeating 140 * is unwrapped based on the length of the input. (When the input is short some 141 * groups are not processed.) 142 * 143 * @returns The size of the groups to write. This means the number of 144 * separator characters written is size() - 1. 145 * 146 * @note Since zero-sized groups cause issues they are silently ignored. 147 * 148 * @note The grouping field of the locale is always a @c std::string, 149 * regardless whether the @c std::numpunct's type is @c char or @c wchar_t. 150 */ 151 _LIBCPP_HIDE_FROM_ABI inline string 152 __determine_grouping(ptrdiff_t __size, const string& __grouping) { 153 _LIBCPP_ASSERT(!__grouping.empty() && __size > __grouping[0], 154 "The slow grouping formatting is used while there will be no " 155 "separators written"); 156 string __r; 157 auto __end = __grouping.end() - 1; 158 auto __ptr = __grouping.begin(); 159 160 while (true) { 161 __size -= *__ptr; 162 if (__size > 0) 163 __r.push_back(*__ptr); 164 else { 165 // __size <= 0 so the value pushed will be <= *__ptr. 166 __r.push_back(*__ptr + __size); 167 return __r; 168 } 169 170 // Proceed to the next group. 171 if (__ptr != __end) { 172 do { 173 ++__ptr; 174 // Skip grouping with a width of 0. 175 } while (*__ptr == 0 && __ptr != __end); 176 } 177 } 178 179 _LIBCPP_UNREACHABLE(); 180 } 181 182 template <class _Parser> 183 requires __formatter::__char_type<typename _Parser::char_type> 184 class _LIBCPP_TEMPLATE_VIS __formatter_integral : public _Parser { 185 public: 186 using _CharT = typename _Parser::char_type; 187 188 template <integral _Tp> 189 _LIBCPP_HIDE_FROM_ABI auto format(_Tp __value, auto& __ctx) 190 -> decltype(__ctx.out()) { 191 if (this->__width_needs_substitution()) 192 this->__substitute_width_arg_id(__ctx.arg(this->__width)); 193 194 if (this->__type == _Flags::_Type::__char) 195 return __format_as_char(__value, __ctx); 196 197 if constexpr (unsigned_integral<_Tp>) 198 return __format_unsigned_integral(__value, false, __ctx); 199 else { 200 // Depending on the std-format-spec string the sign and the value 201 // might not be outputted together: 202 // - alternate form may insert a prefix string. 203 // - zero-padding may insert additional '0' characters. 204 // Therefore the value is processed as a positive unsigned value. 205 // The function @ref __insert_sign will a '-' when the value was negative. 206 auto __r = __to_unsigned_like(__value); 207 bool __negative = __value < 0; 208 if (__negative) 209 __r = __complement(__r); 210 211 return __format_unsigned_integral(__r, __negative, __ctx); 212 } 213 } 214 215 private: 216 /** Generic formatting for format-type c. */ 217 _LIBCPP_HIDE_FROM_ABI auto __format_as_char(integral auto __value, 218 auto& __ctx) 219 -> decltype(__ctx.out()) { 220 if (this->__alignment == _Flags::_Alignment::__default) 221 this->__alignment = _Flags::_Alignment::__right; 222 223 using _Tp = decltype(__value); 224 if constexpr (!same_as<_CharT, _Tp>) { 225 // cmp_less and cmp_greater can't be used for character types. 226 if constexpr (signed_integral<_CharT> == signed_integral<_Tp>) { 227 if (__value < numeric_limits<_CharT>::min() || 228 __value > numeric_limits<_CharT>::max()) 229 __throw_format_error( 230 "Integral value outside the range of the char type"); 231 } else if constexpr (signed_integral<_CharT>) { 232 // _CharT is signed _Tp is unsigned 233 if (__value > 234 static_cast<make_unsigned_t<_CharT>>(numeric_limits<_CharT>::max())) 235 __throw_format_error( 236 "Integral value outside the range of the char type"); 237 } else { 238 // _CharT is unsigned _Tp is signed 239 if (__value < 0 || static_cast<make_unsigned_t<_Tp>>(__value) > 240 numeric_limits<_CharT>::max()) 241 __throw_format_error( 242 "Integral value outside the range of the char type"); 243 } 244 } 245 246 const auto __c = static_cast<_CharT>(__value); 247 return __write(_VSTD::addressof(__c), _VSTD::addressof(__c) + 1, 248 __ctx.out()); 249 } 250 251 /** 252 * Generic formatting for format-type bBdoxX. 253 * 254 * This small wrapper allocates a buffer with the required size. Then calls 255 * the real formatter with the buffer and the prefix for the base. 256 */ 257 _LIBCPP_HIDE_FROM_ABI auto 258 __format_unsigned_integral(unsigned_integral auto __value, bool __negative, 259 auto& __ctx) -> decltype(__ctx.out()) { 260 switch (this->__type) { 261 case _Flags::_Type::__binary_lower_case: { 262 array<char, __buffer_size<decltype(__value), 2>()> __array; 263 return __format_unsigned_integral(__array.begin(), __array.end(), __value, 264 __negative, 2, __ctx, "0b"); 265 } 266 case _Flags::_Type::__binary_upper_case: { 267 array<char, __buffer_size<decltype(__value), 2>()> __array; 268 return __format_unsigned_integral(__array.begin(), __array.end(), __value, 269 __negative, 2, __ctx, "0B"); 270 } 271 case _Flags::_Type::__octal: { 272 // Octal is special; if __value == 0 there's no prefix. 273 array<char, __buffer_size<decltype(__value), 8>()> __array; 274 return __format_unsigned_integral(__array.begin(), __array.end(), __value, 275 __negative, 8, __ctx, 276 __value != 0 ? "0" : nullptr); 277 } 278 case _Flags::_Type::__decimal: { 279 array<char, __buffer_size<decltype(__value), 10>()> __array; 280 return __format_unsigned_integral(__array.begin(), __array.end(), __value, 281 __negative, 10, __ctx, nullptr); 282 } 283 case _Flags::_Type::__hexadecimal_lower_case: { 284 array<char, __buffer_size<decltype(__value), 16>()> __array; 285 return __format_unsigned_integral(__array.begin(), __array.end(), __value, 286 __negative, 16, __ctx, "0x"); 287 } 288 case _Flags::_Type::__hexadecimal_upper_case: { 289 array<char, __buffer_size<decltype(__value), 16>()> __array; 290 return __format_unsigned_integral(__array.begin(), __array.end(), __value, 291 __negative, 16, __ctx, "0X"); 292 } 293 default: 294 _LIBCPP_ASSERT(false, "The parser should have validated the type"); 295 _LIBCPP_UNREACHABLE(); 296 } 297 } 298 299 template <class _Tp> 300 requires(same_as<char, _Tp> || same_as<wchar_t, _Tp>) _LIBCPP_HIDE_FROM_ABI 301 auto __write(const _Tp* __first, const _Tp* __last, auto __out_it) 302 -> decltype(__out_it) { 303 304 unsigned __size = __last - __first; 305 if (this->__type != _Flags::_Type::__hexadecimal_upper_case) [[likely]] { 306 if (__size >= this->__width) 307 return _VSTD::copy(__first, __last, _VSTD::move(__out_it)); 308 309 return __formatter::__write(_VSTD::move(__out_it), __first, __last, 310 __size, this->__width, this->__fill, 311 this->__alignment); 312 } 313 314 // this->__type == _Flags::_Type::__hexadecimal_upper_case 315 // This means all characters in the range [a-f] need to be changed to their 316 // uppercase representation. The transformation is done as transformation 317 // in the output routine instead of before. This avoids another pass over 318 // the data. 319 // TODO FMT See whether it's possible to do this transformation during the 320 // conversion. (This probably requires changing std::to_chars' alphabet.) 321 if (__size >= this->__width) 322 return _VSTD::transform(__first, __last, _VSTD::move(__out_it), 323 __hex_to_upper); 324 325 return __formatter::__write(_VSTD::move(__out_it), __first, __last, __size, 326 __hex_to_upper, this->__width, this->__fill, 327 this->__alignment); 328 } 329 330 _LIBCPP_HIDE_FROM_ABI auto 331 __format_unsigned_integral(char* __begin, char* __end, 332 unsigned_integral auto __value, bool __negative, 333 int __base, auto& __ctx, const char* __prefix) 334 -> decltype(__ctx.out()) { 335 char* __first = __insert_sign(__begin, __negative, this->__sign); 336 if (this->__alternate_form && __prefix) 337 while (*__prefix) 338 *__first++ = *__prefix++; 339 340 char* __last = __to_buffer(__first, __end, __value, __base); 341 #ifndef _LIBCPP_HAS_NO_LOCALIZATION 342 if (this->__locale_specific_form) { 343 const auto& __np = use_facet<numpunct<_CharT>>(__ctx.locale()); 344 string __grouping = __np.grouping(); 345 ptrdiff_t __size = __last - __first; 346 // Writing the grouped form has more overhead than the normal output 347 // routines. If there will be no separators written the locale-specific 348 // form is identical to the normal routine. Test whether to grouped form 349 // is required. 350 if (!__grouping.empty() && __size > __grouping[0]) 351 return __format_grouping(__ctx.out(), __begin, __first, __last, 352 __determine_grouping(__size, __grouping), 353 __np.thousands_sep()); 354 } 355 #endif 356 auto __out_it = __ctx.out(); 357 if (this->__alignment != _Flags::_Alignment::__default) 358 __first = __begin; 359 else { 360 // __buf contains [sign][prefix]data 361 // ^ location of __first 362 // The zero padding is done like: 363 // - Write [sign][prefix] 364 // - Write data right aligned with '0' as fill character. 365 __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it)); 366 this->__alignment = _Flags::_Alignment::__right; 367 this->__fill = _CharT('0'); 368 uint32_t __size = __first - __begin; 369 this->__width -= _VSTD::min(__size, this->__width); 370 } 371 372 return __write(__first, __last, _VSTD::move(__out_it)); 373 } 374 375 #ifndef _LIBCPP_HAS_NO_LOCALIZATION 376 /** Format's the locale-specific form's groupings. */ 377 template <class _OutIt, class _CharT> 378 _LIBCPP_HIDE_FROM_ABI _OutIt 379 __format_grouping(_OutIt __out_it, const char* __begin, const char* __first, 380 const char* __last, string&& __grouping, _CharT __sep) { 381 382 // TODO FMT This function duplicates some functionality of the normal 383 // output routines. Evaluate whether these parts can be efficiently 384 // combined with the existing routines. 385 386 unsigned __size = (__first - __begin) + // [sign][prefix] 387 (__last - __first) + // data 388 (__grouping.size() - 1); // number of separator characters 389 390 __formatter::__padding_size_result __padding = {0, 0}; 391 if (this->__alignment == _Flags::_Alignment::__default) { 392 // Write [sign][prefix]. 393 __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it)); 394 395 if (this->__width > __size) { 396 // Write zero padding. 397 __padding.__before = this->__width - __size; 398 __out_it = _VSTD::fill_n(_VSTD::move(__out_it), this->__width - __size, 399 _CharT('0')); 400 } 401 } else { 402 if (this->__width > __size) { 403 // Determine padding and write padding. 404 __padding = __formatter::__padding_size(__size, this->__width, 405 this->__alignment); 406 407 __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before, 408 this->__fill); 409 } 410 // Write [sign][prefix]. 411 __out_it = _VSTD::copy(__begin, __first, _VSTD::move(__out_it)); 412 } 413 414 auto __r = __grouping.rbegin(); 415 auto __e = __grouping.rend() - 1; 416 _LIBCPP_ASSERT(__r != __e, "The slow grouping formatting is used while " 417 "there will be no separators written."); 418 // The output is divided in small groups of numbers to write: 419 // - A group before the first separator. 420 // - A separator and a group, repeated for the number of separators. 421 // - A group after the last separator. 422 // This loop achieves that process by testing the termination condition 423 // midway in the loop. 424 // 425 // TODO FMT This loop evaluates the loop invariant `this->__type != 426 // _Flags::_Type::__hexadecimal_upper_case` for every iteration. (This test 427 // happens in the __write call.) Benchmark whether making two loops and 428 // hoisting the invariant is worth the effort. 429 while (true) { 430 if (this->__type == _Flags::_Type::__hexadecimal_upper_case) { 431 __last = __first + *__r; 432 __out_it = _VSTD::transform(__first, __last, _VSTD::move(__out_it), 433 __hex_to_upper); 434 __first = __last; 435 } else { 436 __out_it = _VSTD::copy_n(__first, *__r, _VSTD::move(__out_it)); 437 __first += *__r; 438 } 439 440 if (__r == __e) 441 break; 442 443 ++__r; 444 *__out_it++ = __sep; 445 } 446 447 return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, 448 this->__fill); 449 } 450 #endif // _LIBCPP_HAS_NO_LOCALIZATION 451 }; 452 453 } // namespace __format_spec 454 455 #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) 456 457 #endif //_LIBCPP_STD_VER > 17 458 459 _LIBCPP_END_NAMESPACE_STD 460 461 _LIBCPP_POP_MACROS 462 463 #endif // _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H 464