xref: /llvm-project/libcxx/include/__format/formatter_integral.h (revision c6f3b7bcd0596d30f8dabecdfb9e44f9a07b6e4c)
1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H
11 #define _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H
12 
13 #include <__charconv/to_chars_integral.h>
14 #include <__charconv/to_chars_result.h>
15 #include <__charconv/traits.h>
16 #include <__concepts/arithmetic.h>
17 #include <__concepts/same_as.h>
18 #include <__config>
19 #include <__format/concepts.h>
20 #include <__format/format_error.h>
21 #include <__format/formatter_output.h>
22 #include <__format/parser_std_format_spec.h>
23 #include <__iterator/concepts.h>
24 #include <__iterator/iterator_traits.h>
25 #include <__memory/pointer_traits.h>
26 #include <__system_error/errc.h>
27 #include <__type_traits/make_unsigned.h>
28 #include <__utility/unreachable.h>
29 #include <array>
30 #include <cstdint>
31 #include <limits>
32 #include <string>
33 #include <string_view>
34 
35 #if _LIBCPP_HAS_LOCALIZATION
36 #  include <__locale>
37 #endif
38 
39 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
40 #  pragma GCC system_header
41 #endif
42 
43 _LIBCPP_PUSH_MACROS
44 #include <__undef_macros>
45 
46 _LIBCPP_BEGIN_NAMESPACE_STD
47 
48 #if _LIBCPP_STD_VER >= 20
49 
50 namespace __formatter {
51 
52 //
53 // Generic
54 //
55 
56 template <contiguous_iterator _Iterator>
57   requires same_as<char, iter_value_t<_Iterator>>
58 _LIBCPP_HIDE_FROM_ABI inline _Iterator __insert_sign(_Iterator __buf, bool __negative, __format_spec::__sign __sign) {
59   if (__negative)
60     *__buf++ = '-';
61   else
62     switch (__sign) {
63     case __format_spec::__sign::__default:
64     case __format_spec::__sign::__minus:
65       // No sign added.
66       break;
67     case __format_spec::__sign::__plus:
68       *__buf++ = '+';
69       break;
70     case __format_spec::__sign::__space:
71       *__buf++ = ' ';
72       break;
73     }
74 
75   return __buf;
76 }
77 
78 /**
79  * Determines the required grouping based on the size of the input.
80  *
81  * The grouping's last element will be repeated. For simplicity this repeating
82  * is unwrapped based on the length of the input. (When the input is short some
83  * groups are not processed.)
84  *
85  * @returns The size of the groups to write. This means the number of
86  * separator characters written is size() - 1.
87  *
88  * @note Since zero-sized groups cause issues they are silently ignored.
89  *
90  * @note The grouping field of the locale is always a @c std::string,
91  * regardless whether the @c std::numpunct's type is @c char or @c wchar_t.
92  */
93 _LIBCPP_HIDE_FROM_ABI inline string __determine_grouping(ptrdiff_t __size, const string& __grouping) {
94   _LIBCPP_ASSERT_INTERNAL(!__grouping.empty() && __size > __grouping[0],
95                           "The slow grouping formatting is used while there will be no separators written");
96   string __r;
97   auto __end = __grouping.end() - 1;
98   auto __ptr = __grouping.begin();
99 
100   while (true) {
101     __size -= *__ptr;
102     if (__size > 0)
103       __r.push_back(*__ptr);
104     else {
105       // __size <= 0 so the value pushed will be <= *__ptr.
106       __r.push_back(*__ptr + __size);
107       return __r;
108     }
109 
110     // Proceed to the next group.
111     if (__ptr != __end) {
112       do {
113         ++__ptr;
114         // Skip grouping with a width of 0.
115       } while (*__ptr == 0 && __ptr != __end);
116     }
117   }
118 
119   __libcpp_unreachable();
120 }
121 
122 //
123 // Char
124 //
125 
126 template <__fmt_char_type _CharT>
127 _LIBCPP_HIDE_FROM_ABI auto
128 __format_char(integral auto __value,
129               output_iterator<const _CharT&> auto __out_it,
130               __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
131   using _Tp = decltype(__value);
132   if constexpr (!same_as<_CharT, _Tp>) {
133     // cmp_less and cmp_greater can't be used for character types.
134     if constexpr (signed_integral<_CharT> == signed_integral<_Tp>) {
135       if (__value < numeric_limits<_CharT>::min() || __value > numeric_limits<_CharT>::max())
136         std::__throw_format_error("Integral value outside the range of the char type");
137     } else if constexpr (signed_integral<_CharT>) {
138       // _CharT is signed _Tp is unsigned
139       if (__value > static_cast<make_unsigned_t<_CharT>>(numeric_limits<_CharT>::max()))
140         std::__throw_format_error("Integral value outside the range of the char type");
141     } else {
142       // _CharT is unsigned _Tp is signed
143       if (__value < 0 || static_cast<make_unsigned_t<_Tp>>(__value) > numeric_limits<_CharT>::max())
144         std::__throw_format_error("Integral value outside the range of the char type");
145     }
146   }
147 
148   const auto __c = static_cast<_CharT>(__value);
149   return __formatter::__write(std::addressof(__c), std::addressof(__c) + 1, std::move(__out_it), __specs);
150 }
151 
152 //
153 // Integer
154 //
155 
156 /** Wrapper around @ref to_chars, returning the output iterator. */
157 template <contiguous_iterator _Iterator, integral _Tp>
158   requires same_as<char, iter_value_t<_Iterator>>
159 _LIBCPP_HIDE_FROM_ABI _Iterator __to_buffer(_Iterator __first, _Iterator __last, _Tp __value, int __base) {
160   // TODO FMT Evaluate code overhead due to not calling the internal function
161   // directly. (Should be zero overhead.)
162   to_chars_result __r = std::to_chars(std::to_address(__first), std::to_address(__last), __value, __base);
163   _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small");
164   auto __diff = __r.ptr - std::to_address(__first);
165   return __first + __diff;
166 }
167 
168 /**
169  * Helper to determine the buffer size to output a integer in Base @em x.
170  *
171  * There are several overloads for the supported bases. The function uses the
172  * base as template argument so it can be used in a constant expression.
173  */
174 template <unsigned_integral _Tp, size_t _Base>
175 consteval size_t __buffer_size() noexcept
176   requires(_Base == 2)
177 {
178   return numeric_limits<_Tp>::digits // The number of binary digits.
179        + 2                           // Reserve space for the '0[Bb]' prefix.
180        + 1;                          // Reserve space for the sign.
181 }
182 
183 template <unsigned_integral _Tp, size_t _Base>
184 consteval size_t __buffer_size() noexcept
185   requires(_Base == 8)
186 {
187   return numeric_limits<_Tp>::digits // The number of binary digits.
188            / 3                       // Adjust to octal.
189        + 1                           // Turn floor to ceil.
190        + 1                           // Reserve space for the '0' prefix.
191        + 1;                          // Reserve space for the sign.
192 }
193 
194 template <unsigned_integral _Tp, size_t _Base>
195 consteval size_t __buffer_size() noexcept
196   requires(_Base == 10)
197 {
198   return numeric_limits<_Tp>::digits10 // The floored value.
199        + 1                             // Turn floor to ceil.
200        + 1;                            // Reserve space for the sign.
201 }
202 
203 template <unsigned_integral _Tp, size_t _Base>
204 consteval size_t __buffer_size() noexcept
205   requires(_Base == 16)
206 {
207   return numeric_limits<_Tp>::digits // The number of binary digits.
208            / 4                       // Adjust to hexadecimal.
209        + 2                           // Reserve space for the '0[Xx]' prefix.
210        + 1;                          // Reserve space for the sign.
211 }
212 
213 template <class _OutIt, contiguous_iterator _Iterator, class _CharT>
214   requires same_as<char, iter_value_t<_Iterator>>
215 _LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(
216     _OutIt __out_it,
217     _Iterator __begin,
218     _Iterator __first,
219     _Iterator __last,
220     string&& __grouping,
221     _CharT __sep,
222     __format_spec::__parsed_specifications<_CharT> __specs) {
223   int __size = (__first - __begin) +    // [sign][prefix]
224                (__last - __first) +     // data
225                (__grouping.size() - 1); // number of separator characters
226 
227   __padding_size_result __padding = {0, 0};
228   if (__specs.__alignment_ == __format_spec::__alignment::__zero_padding) {
229     // Write [sign][prefix].
230     __out_it = __formatter::__copy(__begin, __first, std::move(__out_it));
231 
232     if (__specs.__width_ > __size) {
233       // Write zero padding.
234       __padding.__before_ = __specs.__width_ - __size;
235       __out_it            = __formatter::__fill(std::move(__out_it), __specs.__width_ - __size, _CharT('0'));
236     }
237   } else {
238     if (__specs.__width_ > __size) {
239       // Determine padding and write padding.
240       __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__alignment_);
241 
242       __out_it = __formatter::__fill(std::move(__out_it), __padding.__before_, __specs.__fill_);
243     }
244     // Write [sign][prefix].
245     __out_it = __formatter::__copy(__begin, __first, std::move(__out_it));
246   }
247 
248   auto __r = __grouping.rbegin();
249   auto __e = __grouping.rend() - 1;
250   _LIBCPP_ASSERT_INTERNAL(
251       __r != __e, "The slow grouping formatting is used while there will be no separators written.");
252   // The output is divided in small groups of numbers to write:
253   // - A group before the first separator.
254   // - A separator and a group, repeated for the number of separators.
255   // - A group after the last separator.
256   // This loop achieves that process by testing the termination condition
257   // midway in the loop.
258   //
259   // TODO FMT This loop evaluates the loop invariant `__parser.__type !=
260   // _Flags::_Type::__hexadecimal_upper_case` for every iteration. (This test
261   // happens in the __write call.) Benchmark whether making two loops and
262   // hoisting the invariant is worth the effort.
263   while (true) {
264     if (__specs.__std_.__type_ == __format_spec::__type::__hexadecimal_upper_case) {
265       __last   = __first + *__r;
266       __out_it = __formatter::__transform(__first, __last, std::move(__out_it), __hex_to_upper);
267       __first  = __last;
268     } else {
269       __out_it = __formatter::__copy(__first, *__r, std::move(__out_it));
270       __first += *__r;
271     }
272 
273     if (__r == __e)
274       break;
275 
276     ++__r;
277     *__out_it++ = __sep;
278   }
279 
280   return __formatter::__fill(std::move(__out_it), __padding.__after_, __specs.__fill_);
281 }
282 
283 template <unsigned_integral _Tp, contiguous_iterator _Iterator, class _CharT, class _FormatContext>
284   requires same_as<char, iter_value_t<_Iterator>>
285 _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator __format_integer(
286     _Tp __value,
287     _FormatContext& __ctx,
288     __format_spec::__parsed_specifications<_CharT> __specs,
289     bool __negative,
290     _Iterator __begin,
291     _Iterator __end,
292     const char* __prefix,
293     int __base) {
294   _Iterator __first = __formatter::__insert_sign(__begin, __negative, __specs.__std_.__sign_);
295   if (__specs.__std_.__alternate_form_ && __prefix)
296     while (*__prefix)
297       *__first++ = *__prefix++;
298 
299   _Iterator __last = __formatter::__to_buffer(__first, __end, __value, __base);
300 
301 #  if _LIBCPP_HAS_LOCALIZATION
302   if (__specs.__std_.__locale_specific_form_) {
303     const auto& __np  = std::use_facet<numpunct<_CharT>>(__ctx.locale());
304     string __grouping = __np.grouping();
305     ptrdiff_t __size  = __last - __first;
306     // Writing the grouped form has more overhead than the normal output
307     // routines. If there will be no separators written the locale-specific
308     // form is identical to the normal routine. Test whether to grouped form
309     // is required.
310     if (!__grouping.empty() && __size > __grouping[0])
311       return __formatter::__write_using_decimal_separators(
312           __ctx.out(),
313           __begin,
314           __first,
315           __last,
316           __formatter::__determine_grouping(__size, __grouping),
317           __np.thousands_sep(),
318           __specs);
319   }
320 #  endif
321   auto __out_it = __ctx.out();
322   if (__specs.__alignment_ != __format_spec::__alignment::__zero_padding)
323     __first = __begin;
324   else {
325     // __buf contains [sign][prefix]data
326     //                              ^ location of __first
327     // The zero padding is done like:
328     // - Write [sign][prefix]
329     // - Write data right aligned with '0' as fill character.
330     __out_it                  = __formatter::__copy(__begin, __first, std::move(__out_it));
331     __specs.__alignment_      = __format_spec::__alignment::__right;
332     __specs.__fill_.__data[0] = _CharT('0');
333     int32_t __size            = __first - __begin;
334 
335     __specs.__width_ -= std::min(__size, __specs.__width_);
336   }
337 
338   if (__specs.__std_.__type_ != __format_spec::__type::__hexadecimal_upper_case) [[likely]]
339     return __formatter::__write(__first, __last, __ctx.out(), __specs);
340 
341   return __formatter::__write_transformed(__first, __last, __ctx.out(), __specs, __formatter::__hex_to_upper);
342 }
343 
344 template <unsigned_integral _Tp, class _CharT, class _FormatContext>
345 _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator
346 __format_integer(_Tp __value,
347                  _FormatContext& __ctx,
348                  __format_spec::__parsed_specifications<_CharT> __specs,
349                  bool __negative = false) {
350   switch (__specs.__std_.__type_) {
351   case __format_spec::__type::__binary_lower_case: {
352     array<char, __formatter::__buffer_size<decltype(__value), 2>()> __array;
353     return __formatter::__format_integer(__value, __ctx, __specs, __negative, __array.begin(), __array.end(), "0b", 2);
354   }
355   case __format_spec::__type::__binary_upper_case: {
356     array<char, __formatter::__buffer_size<decltype(__value), 2>()> __array;
357     return __formatter::__format_integer(__value, __ctx, __specs, __negative, __array.begin(), __array.end(), "0B", 2);
358   }
359   case __format_spec::__type::__octal: {
360     // Octal is special; if __value == 0 there's no prefix.
361     array<char, __formatter::__buffer_size<decltype(__value), 8>()> __array;
362     return __formatter::__format_integer(
363         __value, __ctx, __specs, __negative, __array.begin(), __array.end(), __value != 0 ? "0" : nullptr, 8);
364   }
365   case __format_spec::__type::__default:
366   case __format_spec::__type::__decimal: {
367     array<char, __formatter::__buffer_size<decltype(__value), 10>()> __array;
368     return __formatter::__format_integer(
369         __value, __ctx, __specs, __negative, __array.begin(), __array.end(), nullptr, 10);
370   }
371   case __format_spec::__type::__hexadecimal_lower_case: {
372     array<char, __formatter::__buffer_size<decltype(__value), 16>()> __array;
373     return __formatter::__format_integer(__value, __ctx, __specs, __negative, __array.begin(), __array.end(), "0x", 16);
374   }
375   case __format_spec::__type::__hexadecimal_upper_case: {
376     array<char, __formatter::__buffer_size<decltype(__value), 16>()> __array;
377     return __formatter::__format_integer(__value, __ctx, __specs, __negative, __array.begin(), __array.end(), "0X", 16);
378   }
379   default:
380     _LIBCPP_ASSERT_INTERNAL(false, "The parse function should have validated the type");
381     __libcpp_unreachable();
382   }
383 }
384 
385 template <signed_integral _Tp, class _CharT, class _FormatContext>
386 _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator
387 __format_integer(_Tp __value, _FormatContext& __ctx, __format_spec::__parsed_specifications<_CharT> __specs) {
388   // Depending on the std-format-spec string the sign and the value
389   // might not be outputted together:
390   // - alternate form may insert a prefix string.
391   // - zero-padding may insert additional '0' characters.
392   // Therefore the value is processed as a positive unsigned value.
393   // The function @ref __insert_sign will a '-' when the value was negative.
394   auto __r        = std::__to_unsigned_like(__value);
395   bool __negative = __value < 0;
396   if (__negative)
397     __r = std::__complement(__r);
398 
399   return __formatter::__format_integer(__r, __ctx, __specs, __negative);
400 }
401 
402 //
403 // Formatter arithmetic (bool)
404 //
405 
406 template <class _CharT>
407 struct _LIBCPP_TEMPLATE_VIS __bool_strings;
408 
409 template <>
410 struct _LIBCPP_TEMPLATE_VIS __bool_strings<char> {
411   static constexpr string_view __true{"true"};
412   static constexpr string_view __false{"false"};
413 };
414 
415 #  if _LIBCPP_HAS_WIDE_CHARACTERS
416 template <>
417 struct _LIBCPP_TEMPLATE_VIS __bool_strings<wchar_t> {
418   static constexpr wstring_view __true{L"true"};
419   static constexpr wstring_view __false{L"false"};
420 };
421 #  endif
422 
423 template <class _CharT, class _FormatContext>
424 _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator
425 __format_bool(bool __value, _FormatContext& __ctx, __format_spec::__parsed_specifications<_CharT> __specs) {
426 #  if _LIBCPP_HAS_LOCALIZATION
427   if (__specs.__std_.__locale_specific_form_) {
428     const auto& __np           = std::use_facet<numpunct<_CharT>>(__ctx.locale());
429     basic_string<_CharT> __str = __value ? __np.truename() : __np.falsename();
430     return __formatter::__write_string_no_precision(basic_string_view<_CharT>{__str}, __ctx.out(), __specs);
431   }
432 #  endif
433   basic_string_view<_CharT> __str =
434       __value ? __formatter::__bool_strings<_CharT>::__true : __formatter::__bool_strings<_CharT>::__false;
435   return __formatter::__write(__str.begin(), __str.end(), __ctx.out(), __specs);
436 }
437 
438 } // namespace __formatter
439 
440 #endif // _LIBCPP_STD_VER >= 20
441 
442 _LIBCPP_END_NAMESPACE_STD
443 
444 _LIBCPP_POP_MACROS
445 
446 #endif // _LIBCPP___FORMAT_FORMATTER_INTEGRAL_H
447