xref: /llvm-project/libcxx/src/ryu/d2s.cpp (revision ba87515fea90b5d55836a8e3be63a7e683ce299d)
1abb5dd6eSMark de Wever //===----------------------------------------------------------------------===//
2abb5dd6eSMark de Wever //
3abb5dd6eSMark de Wever // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4abb5dd6eSMark de Wever // See https://llvm.org/LICENSE.txt for license information.
5abb5dd6eSMark de Wever // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6abb5dd6eSMark de Wever //
7abb5dd6eSMark de Wever //===----------------------------------------------------------------------===//
8abb5dd6eSMark de Wever 
9abb5dd6eSMark de Wever // Copyright (c) Microsoft Corporation.
10abb5dd6eSMark de Wever // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11abb5dd6eSMark de Wever 
12abb5dd6eSMark de Wever // Copyright 2018 Ulf Adams
13abb5dd6eSMark de Wever // Copyright (c) Microsoft Corporation. All rights reserved.
14abb5dd6eSMark de Wever 
15abb5dd6eSMark de Wever // Boost Software License - Version 1.0 - August 17th, 2003
16abb5dd6eSMark de Wever 
17abb5dd6eSMark de Wever // Permission is hereby granted, free of charge, to any person or organization
18abb5dd6eSMark de Wever // obtaining a copy of the software and accompanying documentation covered by
19abb5dd6eSMark de Wever // this license (the "Software") to use, reproduce, display, distribute,
20abb5dd6eSMark de Wever // execute, and transmit the Software, and to prepare derivative works of the
21abb5dd6eSMark de Wever // Software, and to permit third-parties to whom the Software is furnished to
22abb5dd6eSMark de Wever // do so, all subject to the following:
23abb5dd6eSMark de Wever 
24abb5dd6eSMark de Wever // The copyright notices in the Software and this entire statement, including
25abb5dd6eSMark de Wever // the above license grant, this restriction and the following disclaimer,
26abb5dd6eSMark de Wever // must be included in all copies of the Software, in whole or in part, and
27abb5dd6eSMark de Wever // all derivative works of the Software, unless such copies or derivative
28abb5dd6eSMark de Wever // works are solely in the form of machine-executable object code generated by
29abb5dd6eSMark de Wever // a source language processor.
30abb5dd6eSMark de Wever 
31abb5dd6eSMark de Wever // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32abb5dd6eSMark de Wever // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33abb5dd6eSMark de Wever // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
34abb5dd6eSMark de Wever // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
35abb5dd6eSMark de Wever // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
36abb5dd6eSMark de Wever // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
37abb5dd6eSMark de Wever // DEALINGS IN THE SOFTWARE.
38abb5dd6eSMark de Wever 
39abb5dd6eSMark de Wever // Avoid formatting to keep the changes with the original code minimal.
40abb5dd6eSMark de Wever // clang-format off
41abb5dd6eSMark de Wever 
42f87aa19bSLouis Dionne #include <__assert>
43bbb0f2c7SArthur O'Dwyer #include <__config>
44bbb0f2c7SArthur O'Dwyer #include <charconv>
45abb5dd6eSMark de Wever 
46abb5dd6eSMark de Wever #include "include/ryu/common.h"
47abb5dd6eSMark de Wever #include "include/ryu/d2fixed.h"
48abb5dd6eSMark de Wever #include "include/ryu/d2s.h"
49abb5dd6eSMark de Wever #include "include/ryu/d2s_full_table.h"
50abb5dd6eSMark de Wever #include "include/ryu/d2s_intrinsics.h"
51abb5dd6eSMark de Wever #include "include/ryu/digit_table.h"
52abb5dd6eSMark de Wever #include "include/ryu/ryu.h"
53abb5dd6eSMark de Wever 
54abb5dd6eSMark de Wever _LIBCPP_BEGIN_NAMESPACE_STD
55abb5dd6eSMark de Wever 
56abb5dd6eSMark de Wever // We need a 64x128-bit multiplication and a subsequent 128-bit shift.
57abb5dd6eSMark de Wever // Multiplication:
58abb5dd6eSMark de Wever //   The 64-bit factor is variable and passed in, the 128-bit factor comes
59abb5dd6eSMark de Wever //   from a lookup table. We know that the 64-bit factor only has 55
60abb5dd6eSMark de Wever //   significant bits (i.e., the 9 topmost bits are zeros). The 128-bit
61abb5dd6eSMark de Wever //   factor only has 124 significant bits (i.e., the 4 topmost bits are
62abb5dd6eSMark de Wever //   zeros).
63abb5dd6eSMark de Wever // Shift:
64abb5dd6eSMark de Wever //   In principle, the multiplication result requires 55 + 124 = 179 bits to
65abb5dd6eSMark de Wever //   represent. However, we then shift this value to the right by __j, which is
66abb5dd6eSMark de Wever //   at least __j >= 115, so the result is guaranteed to fit into 179 - 115 = 64
67abb5dd6eSMark de Wever //   bits. This means that we only need the topmost 64 significant bits of
68abb5dd6eSMark de Wever //   the 64x128-bit multiplication.
69abb5dd6eSMark de Wever //
70abb5dd6eSMark de Wever // There are several ways to do this:
71abb5dd6eSMark de Wever // 1. Best case: the compiler exposes a 128-bit type.
72abb5dd6eSMark de Wever //    We perform two 64x64-bit multiplications, add the higher 64 bits of the
73abb5dd6eSMark de Wever //    lower result to the higher result, and shift by __j - 64 bits.
74abb5dd6eSMark de Wever //
75abb5dd6eSMark de Wever //    We explicitly cast from 64-bit to 128-bit, so the compiler can tell
76abb5dd6eSMark de Wever //    that these are only 64-bit inputs, and can map these to the best
77abb5dd6eSMark de Wever //    possible sequence of assembly instructions.
78abb5dd6eSMark de Wever //    x64 machines happen to have matching assembly instructions for
79abb5dd6eSMark de Wever //    64x64-bit multiplications and 128-bit shifts.
80abb5dd6eSMark de Wever //
81abb5dd6eSMark de Wever // 2. Second best case: the compiler exposes intrinsics for the x64 assembly
82abb5dd6eSMark de Wever //    instructions mentioned in 1.
83abb5dd6eSMark de Wever //
84abb5dd6eSMark de Wever // 3. We only have 64x64 bit instructions that return the lower 64 bits of
85abb5dd6eSMark de Wever //    the result, i.e., we have to use plain C.
86abb5dd6eSMark de Wever //    Our inputs are less than the full width, so we have three options:
87abb5dd6eSMark de Wever //    a. Ignore this fact and just implement the intrinsics manually.
88abb5dd6eSMark de Wever //    b. Split both into 31-bit pieces, which guarantees no internal overflow,
89abb5dd6eSMark de Wever //       but requires extra work upfront (unless we change the lookup table).
90abb5dd6eSMark de Wever //    c. Split only the first factor into 31-bit pieces, which also guarantees
91abb5dd6eSMark de Wever //       no internal overflow, but requires extra work since the intermediate
92abb5dd6eSMark de Wever //       results are not perfectly aligned.
93abb5dd6eSMark de Wever #ifdef _LIBCPP_INTRINSIC128
94abb5dd6eSMark de Wever 
95abb5dd6eSMark de Wever [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __mulShift(const uint64_t __m, const uint64_t* const __mul, const int32_t __j) {
96abb5dd6eSMark de Wever   // __m is maximum 55 bits
97abb5dd6eSMark de Wever   uint64_t __high1;                                               // 128
98abb5dd6eSMark de Wever   const uint64_t __low1 = __ryu_umul128(__m, __mul[1], &__high1); // 64
99abb5dd6eSMark de Wever   uint64_t __high0;                                               // 64
100abb5dd6eSMark de Wever   (void) __ryu_umul128(__m, __mul[0], &__high0);                  // 0
101abb5dd6eSMark de Wever   const uint64_t __sum = __high0 + __low1;
102abb5dd6eSMark de Wever   if (__sum < __high0) {
103abb5dd6eSMark de Wever     ++__high1; // overflow into __high1
104abb5dd6eSMark de Wever   }
105abb5dd6eSMark de Wever   return __ryu_shiftright128(__sum, __high1, static_cast<uint32_t>(__j - 64));
106abb5dd6eSMark de Wever }
107abb5dd6eSMark de Wever 
108abb5dd6eSMark de Wever [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __mulShiftAll(const uint64_t __m, const uint64_t* const __mul, const int32_t __j,
109abb5dd6eSMark de Wever   uint64_t* const __vp, uint64_t* const __vm, const uint32_t __mmShift) {
110abb5dd6eSMark de Wever   *__vp = __mulShift(4 * __m + 2, __mul, __j);
111abb5dd6eSMark de Wever   *__vm = __mulShift(4 * __m - 1 - __mmShift, __mul, __j);
112abb5dd6eSMark de Wever   return __mulShift(4 * __m, __mul, __j);
113abb5dd6eSMark de Wever }
114abb5dd6eSMark de Wever 
115abb5dd6eSMark de Wever #else // ^^^ intrinsics available ^^^ / vvv intrinsics unavailable vvv
116abb5dd6eSMark de Wever 
117abb5dd6eSMark de Wever [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_ALWAYS_INLINE uint64_t __mulShiftAll(uint64_t __m, const uint64_t* const __mul, const int32_t __j,
118abb5dd6eSMark de Wever   uint64_t* const __vp, uint64_t* const __vm, const uint32_t __mmShift) { // TRANSITION, VSO-634761
119abb5dd6eSMark de Wever   __m <<= 1;
120abb5dd6eSMark de Wever   // __m is maximum 55 bits
121abb5dd6eSMark de Wever   uint64_t __tmp;
122abb5dd6eSMark de Wever   const uint64_t __lo = __ryu_umul128(__m, __mul[0], &__tmp);
123abb5dd6eSMark de Wever   uint64_t __hi;
124abb5dd6eSMark de Wever   const uint64_t __mid = __tmp + __ryu_umul128(__m, __mul[1], &__hi);
125abb5dd6eSMark de Wever   __hi += __mid < __tmp; // overflow into __hi
126abb5dd6eSMark de Wever 
127abb5dd6eSMark de Wever   const uint64_t __lo2 = __lo + __mul[0];
128abb5dd6eSMark de Wever   const uint64_t __mid2 = __mid + __mul[1] + (__lo2 < __lo);
129abb5dd6eSMark de Wever   const uint64_t __hi2 = __hi + (__mid2 < __mid);
130abb5dd6eSMark de Wever   *__vp = __ryu_shiftright128(__mid2, __hi2, static_cast<uint32_t>(__j - 64 - 1));
131abb5dd6eSMark de Wever 
132abb5dd6eSMark de Wever   if (__mmShift == 1) {
133abb5dd6eSMark de Wever     const uint64_t __lo3 = __lo - __mul[0];
134abb5dd6eSMark de Wever     const uint64_t __mid3 = __mid - __mul[1] - (__lo3 > __lo);
135abb5dd6eSMark de Wever     const uint64_t __hi3 = __hi - (__mid3 > __mid);
136abb5dd6eSMark de Wever     *__vm = __ryu_shiftright128(__mid3, __hi3, static_cast<uint32_t>(__j - 64 - 1));
137abb5dd6eSMark de Wever   } else {
138abb5dd6eSMark de Wever     const uint64_t __lo3 = __lo + __lo;
139abb5dd6eSMark de Wever     const uint64_t __mid3 = __mid + __mid + (__lo3 < __lo);
140abb5dd6eSMark de Wever     const uint64_t __hi3 = __hi + __hi + (__mid3 < __mid);
141abb5dd6eSMark de Wever     const uint64_t __lo4 = __lo3 - __mul[0];
142abb5dd6eSMark de Wever     const uint64_t __mid4 = __mid3 - __mul[1] - (__lo4 > __lo3);
143abb5dd6eSMark de Wever     const uint64_t __hi4 = __hi3 - (__mid4 > __mid3);
144abb5dd6eSMark de Wever     *__vm = __ryu_shiftright128(__mid4, __hi4, static_cast<uint32_t>(__j - 64));
145abb5dd6eSMark de Wever   }
146abb5dd6eSMark de Wever 
147abb5dd6eSMark de Wever   return __ryu_shiftright128(__mid, __hi, static_cast<uint32_t>(__j - 64 - 1));
148abb5dd6eSMark de Wever }
149abb5dd6eSMark de Wever 
150abb5dd6eSMark de Wever #endif // ^^^ intrinsics unavailable ^^^
151abb5dd6eSMark de Wever 
152abb5dd6eSMark de Wever [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __decimalLength17(const uint64_t __v) {
153abb5dd6eSMark de Wever   // This is slightly faster than a loop.
154abb5dd6eSMark de Wever   // The average output length is 16.38 digits, so we check high-to-low.
155abb5dd6eSMark de Wever   // Function precondition: __v is not an 18, 19, or 20-digit number.
156abb5dd6eSMark de Wever   // (17 digits are sufficient for round-tripping.)
157bed1a5b3SKonstantin Varlamov   _LIBCPP_ASSERT_INTERNAL(__v < 100000000000000000u, "");
158abb5dd6eSMark de Wever   if (__v >= 10000000000000000u) { return 17; }
159abb5dd6eSMark de Wever   if (__v >= 1000000000000000u) { return 16; }
160abb5dd6eSMark de Wever   if (__v >= 100000000000000u) { return 15; }
161abb5dd6eSMark de Wever   if (__v >= 10000000000000u) { return 14; }
162abb5dd6eSMark de Wever   if (__v >= 1000000000000u) { return 13; }
163abb5dd6eSMark de Wever   if (__v >= 100000000000u) { return 12; }
164abb5dd6eSMark de Wever   if (__v >= 10000000000u) { return 11; }
165abb5dd6eSMark de Wever   if (__v >= 1000000000u) { return 10; }
166abb5dd6eSMark de Wever   if (__v >= 100000000u) { return 9; }
167abb5dd6eSMark de Wever   if (__v >= 10000000u) { return 8; }
168abb5dd6eSMark de Wever   if (__v >= 1000000u) { return 7; }
169abb5dd6eSMark de Wever   if (__v >= 100000u) { return 6; }
170abb5dd6eSMark de Wever   if (__v >= 10000u) { return 5; }
171abb5dd6eSMark de Wever   if (__v >= 1000u) { return 4; }
172abb5dd6eSMark de Wever   if (__v >= 100u) { return 3; }
173abb5dd6eSMark de Wever   if (__v >= 10u) { return 2; }
174abb5dd6eSMark de Wever   return 1;
175abb5dd6eSMark de Wever }
176abb5dd6eSMark de Wever 
177abb5dd6eSMark de Wever // A floating decimal representing m * 10^e.
178abb5dd6eSMark de Wever struct __floating_decimal_64 {
179abb5dd6eSMark de Wever   uint64_t __mantissa;
180abb5dd6eSMark de Wever   int32_t __exponent;
181abb5dd6eSMark de Wever };
182abb5dd6eSMark de Wever 
183abb5dd6eSMark de Wever [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline __floating_decimal_64 __d2d(const uint64_t __ieeeMantissa, const uint32_t __ieeeExponent) {
184abb5dd6eSMark de Wever   int32_t __e2;
185abb5dd6eSMark de Wever   uint64_t __m2;
186abb5dd6eSMark de Wever   if (__ieeeExponent == 0) {
187abb5dd6eSMark de Wever     // We subtract 2 so that the bounds computation has 2 additional bits.
188abb5dd6eSMark de Wever     __e2 = 1 - __DOUBLE_BIAS - __DOUBLE_MANTISSA_BITS - 2;
189abb5dd6eSMark de Wever     __m2 = __ieeeMantissa;
190abb5dd6eSMark de Wever   } else {
191abb5dd6eSMark de Wever     __e2 = static_cast<int32_t>(__ieeeExponent) - __DOUBLE_BIAS - __DOUBLE_MANTISSA_BITS - 2;
192abb5dd6eSMark de Wever     __m2 = (1ull << __DOUBLE_MANTISSA_BITS) | __ieeeMantissa;
193abb5dd6eSMark de Wever   }
194abb5dd6eSMark de Wever   const bool __even = (__m2 & 1) == 0;
195abb5dd6eSMark de Wever   const bool __acceptBounds = __even;
196abb5dd6eSMark de Wever 
197abb5dd6eSMark de Wever   // Step 2: Determine the interval of valid decimal representations.
198abb5dd6eSMark de Wever   const uint64_t __mv = 4 * __m2;
199abb5dd6eSMark de Wever   // Implicit bool -> int conversion. True is 1, false is 0.
200abb5dd6eSMark de Wever   const uint32_t __mmShift = __ieeeMantissa != 0 || __ieeeExponent <= 1;
201abb5dd6eSMark de Wever   // We would compute __mp and __mm like this:
202abb5dd6eSMark de Wever   // uint64_t __mp = 4 * __m2 + 2;
203abb5dd6eSMark de Wever   // uint64_t __mm = __mv - 1 - __mmShift;
204abb5dd6eSMark de Wever 
205abb5dd6eSMark de Wever   // Step 3: Convert to a decimal power base using 128-bit arithmetic.
206abb5dd6eSMark de Wever   uint64_t __vr, __vp, __vm;
207abb5dd6eSMark de Wever   int32_t __e10;
208abb5dd6eSMark de Wever   bool __vmIsTrailingZeros = false;
209abb5dd6eSMark de Wever   bool __vrIsTrailingZeros = false;
210abb5dd6eSMark de Wever   if (__e2 >= 0) {
211abb5dd6eSMark de Wever     // I tried special-casing __q == 0, but there was no effect on performance.
212abb5dd6eSMark de Wever     // This expression is slightly faster than max(0, __log10Pow2(__e2) - 1).
213abb5dd6eSMark de Wever     const uint32_t __q = __log10Pow2(__e2) - (__e2 > 3);
214abb5dd6eSMark de Wever     __e10 = static_cast<int32_t>(__q);
215abb5dd6eSMark de Wever     const int32_t __k = __DOUBLE_POW5_INV_BITCOUNT + __pow5bits(static_cast<int32_t>(__q)) - 1;
216abb5dd6eSMark de Wever     const int32_t __i = -__e2 + static_cast<int32_t>(__q) + __k;
217abb5dd6eSMark de Wever     __vr = __mulShiftAll(__m2, __DOUBLE_POW5_INV_SPLIT[__q], __i, &__vp, &__vm, __mmShift);
218abb5dd6eSMark de Wever     if (__q <= 21) {
219abb5dd6eSMark de Wever       // This should use __q <= 22, but I think 21 is also safe. Smaller values
220abb5dd6eSMark de Wever       // may still be safe, but it's more difficult to reason about them.
221abb5dd6eSMark de Wever       // Only one of __mp, __mv, and __mm can be a multiple of 5, if any.
222abb5dd6eSMark de Wever       const uint32_t __mvMod5 = static_cast<uint32_t>(__mv) - 5 * static_cast<uint32_t>(__div5(__mv));
223abb5dd6eSMark de Wever       if (__mvMod5 == 0) {
224abb5dd6eSMark de Wever         __vrIsTrailingZeros = __multipleOfPowerOf5(__mv, __q);
225abb5dd6eSMark de Wever       } else if (__acceptBounds) {
226abb5dd6eSMark de Wever         // Same as min(__e2 + (~__mm & 1), __pow5Factor(__mm)) >= __q
227abb5dd6eSMark de Wever         // <=> __e2 + (~__mm & 1) >= __q && __pow5Factor(__mm) >= __q
228abb5dd6eSMark de Wever         // <=> true && __pow5Factor(__mm) >= __q, since __e2 >= __q.
229abb5dd6eSMark de Wever         __vmIsTrailingZeros = __multipleOfPowerOf5(__mv - 1 - __mmShift, __q);
230abb5dd6eSMark de Wever       } else {
231abb5dd6eSMark de Wever         // Same as min(__e2 + 1, __pow5Factor(__mp)) >= __q.
232abb5dd6eSMark de Wever         __vp -= __multipleOfPowerOf5(__mv + 2, __q);
233abb5dd6eSMark de Wever       }
234abb5dd6eSMark de Wever     }
235abb5dd6eSMark de Wever   } else {
236abb5dd6eSMark de Wever     // This expression is slightly faster than max(0, __log10Pow5(-__e2) - 1).
237abb5dd6eSMark de Wever     const uint32_t __q = __log10Pow5(-__e2) - (-__e2 > 1);
238abb5dd6eSMark de Wever     __e10 = static_cast<int32_t>(__q) + __e2;
239abb5dd6eSMark de Wever     const int32_t __i = -__e2 - static_cast<int32_t>(__q);
240abb5dd6eSMark de Wever     const int32_t __k = __pow5bits(__i) - __DOUBLE_POW5_BITCOUNT;
241abb5dd6eSMark de Wever     const int32_t __j = static_cast<int32_t>(__q) - __k;
242abb5dd6eSMark de Wever     __vr = __mulShiftAll(__m2, __DOUBLE_POW5_SPLIT[__i], __j, &__vp, &__vm, __mmShift);
243abb5dd6eSMark de Wever     if (__q <= 1) {
244abb5dd6eSMark de Wever       // {__vr,__vp,__vm} is trailing zeros if {__mv,__mp,__mm} has at least __q trailing 0 bits.
245abb5dd6eSMark de Wever       // __mv = 4 * __m2, so it always has at least two trailing 0 bits.
246abb5dd6eSMark de Wever       __vrIsTrailingZeros = true;
247abb5dd6eSMark de Wever       if (__acceptBounds) {
248abb5dd6eSMark de Wever         // __mm = __mv - 1 - __mmShift, so it has 1 trailing 0 bit iff __mmShift == 1.
249abb5dd6eSMark de Wever         __vmIsTrailingZeros = __mmShift == 1;
250abb5dd6eSMark de Wever       } else {
251abb5dd6eSMark de Wever         // __mp = __mv + 2, so it always has at least one trailing 0 bit.
252abb5dd6eSMark de Wever         --__vp;
253abb5dd6eSMark de Wever       }
254abb5dd6eSMark de Wever     } else if (__q < 63) { // TRANSITION(ulfjack): Use a tighter bound here.
255abb5dd6eSMark de Wever       // We need to compute min(ntz(__mv), __pow5Factor(__mv) - __e2) >= __q - 1
256abb5dd6eSMark de Wever       // <=> ntz(__mv) >= __q - 1 && __pow5Factor(__mv) - __e2 >= __q - 1
257abb5dd6eSMark de Wever       // <=> ntz(__mv) >= __q - 1 (__e2 is negative and -__e2 >= __q)
258abb5dd6eSMark de Wever       // <=> (__mv & ((1 << (__q - 1)) - 1)) == 0
259abb5dd6eSMark de Wever       // We also need to make sure that the left shift does not overflow.
260abb5dd6eSMark de Wever       __vrIsTrailingZeros = __multipleOfPowerOf2(__mv, __q - 1);
261abb5dd6eSMark de Wever     }
262abb5dd6eSMark de Wever   }
263abb5dd6eSMark de Wever 
264abb5dd6eSMark de Wever   // Step 4: Find the shortest decimal representation in the interval of valid representations.
265abb5dd6eSMark de Wever   int32_t __removed = 0;
266abb5dd6eSMark de Wever   uint8_t __lastRemovedDigit = 0;
267abb5dd6eSMark de Wever   uint64_t _Output;
268abb5dd6eSMark de Wever   // On average, we remove ~2 digits.
269abb5dd6eSMark de Wever   if (__vmIsTrailingZeros || __vrIsTrailingZeros) {
270abb5dd6eSMark de Wever     // General case, which happens rarely (~0.7%).
271abb5dd6eSMark de Wever     for (;;) {
272abb5dd6eSMark de Wever       const uint64_t __vpDiv10 = __div10(__vp);
273abb5dd6eSMark de Wever       const uint64_t __vmDiv10 = __div10(__vm);
274abb5dd6eSMark de Wever       if (__vpDiv10 <= __vmDiv10) {
275abb5dd6eSMark de Wever         break;
276abb5dd6eSMark de Wever       }
277abb5dd6eSMark de Wever       const uint32_t __vmMod10 = static_cast<uint32_t>(__vm) - 10 * static_cast<uint32_t>(__vmDiv10);
278abb5dd6eSMark de Wever       const uint64_t __vrDiv10 = __div10(__vr);
279abb5dd6eSMark de Wever       const uint32_t __vrMod10 = static_cast<uint32_t>(__vr) - 10 * static_cast<uint32_t>(__vrDiv10);
280abb5dd6eSMark de Wever       __vmIsTrailingZeros &= __vmMod10 == 0;
281abb5dd6eSMark de Wever       __vrIsTrailingZeros &= __lastRemovedDigit == 0;
282abb5dd6eSMark de Wever       __lastRemovedDigit = static_cast<uint8_t>(__vrMod10);
283abb5dd6eSMark de Wever       __vr = __vrDiv10;
284abb5dd6eSMark de Wever       __vp = __vpDiv10;
285abb5dd6eSMark de Wever       __vm = __vmDiv10;
286abb5dd6eSMark de Wever       ++__removed;
287abb5dd6eSMark de Wever     }
288abb5dd6eSMark de Wever     if (__vmIsTrailingZeros) {
289abb5dd6eSMark de Wever       for (;;) {
290abb5dd6eSMark de Wever         const uint64_t __vmDiv10 = __div10(__vm);
291abb5dd6eSMark de Wever         const uint32_t __vmMod10 = static_cast<uint32_t>(__vm) - 10 * static_cast<uint32_t>(__vmDiv10);
292abb5dd6eSMark de Wever         if (__vmMod10 != 0) {
293abb5dd6eSMark de Wever           break;
294abb5dd6eSMark de Wever         }
295abb5dd6eSMark de Wever         const uint64_t __vpDiv10 = __div10(__vp);
296abb5dd6eSMark de Wever         const uint64_t __vrDiv10 = __div10(__vr);
297abb5dd6eSMark de Wever         const uint32_t __vrMod10 = static_cast<uint32_t>(__vr) - 10 * static_cast<uint32_t>(__vrDiv10);
298abb5dd6eSMark de Wever         __vrIsTrailingZeros &= __lastRemovedDigit == 0;
299abb5dd6eSMark de Wever         __lastRemovedDigit = static_cast<uint8_t>(__vrMod10);
300abb5dd6eSMark de Wever         __vr = __vrDiv10;
301abb5dd6eSMark de Wever         __vp = __vpDiv10;
302abb5dd6eSMark de Wever         __vm = __vmDiv10;
303abb5dd6eSMark de Wever         ++__removed;
304abb5dd6eSMark de Wever       }
305abb5dd6eSMark de Wever     }
306abb5dd6eSMark de Wever     if (__vrIsTrailingZeros && __lastRemovedDigit == 5 && __vr % 2 == 0) {
307abb5dd6eSMark de Wever       // Round even if the exact number is .....50..0.
308abb5dd6eSMark de Wever       __lastRemovedDigit = 4;
309abb5dd6eSMark de Wever     }
310abb5dd6eSMark de Wever     // We need to take __vr + 1 if __vr is outside bounds or we need to round up.
311abb5dd6eSMark de Wever     _Output = __vr + ((__vr == __vm && (!__acceptBounds || !__vmIsTrailingZeros)) || __lastRemovedDigit >= 5);
312abb5dd6eSMark de Wever   } else {
313abb5dd6eSMark de Wever     // Specialized for the common case (~99.3%). Percentages below are relative to this.
314abb5dd6eSMark de Wever     bool __roundUp = false;
315abb5dd6eSMark de Wever     const uint64_t __vpDiv100 = __div100(__vp);
316abb5dd6eSMark de Wever     const uint64_t __vmDiv100 = __div100(__vm);
317abb5dd6eSMark de Wever     if (__vpDiv100 > __vmDiv100) { // Optimization: remove two digits at a time (~86.2%).
318abb5dd6eSMark de Wever       const uint64_t __vrDiv100 = __div100(__vr);
319abb5dd6eSMark de Wever       const uint32_t __vrMod100 = static_cast<uint32_t>(__vr) - 100 * static_cast<uint32_t>(__vrDiv100);
320abb5dd6eSMark de Wever       __roundUp = __vrMod100 >= 50;
321abb5dd6eSMark de Wever       __vr = __vrDiv100;
322abb5dd6eSMark de Wever       __vp = __vpDiv100;
323abb5dd6eSMark de Wever       __vm = __vmDiv100;
324abb5dd6eSMark de Wever       __removed += 2;
325abb5dd6eSMark de Wever     }
326abb5dd6eSMark de Wever     // Loop iterations below (approximately), without optimization above:
327abb5dd6eSMark de Wever     // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
328abb5dd6eSMark de Wever     // Loop iterations below (approximately), with optimization above:
329abb5dd6eSMark de Wever     // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
330abb5dd6eSMark de Wever     for (;;) {
331abb5dd6eSMark de Wever       const uint64_t __vpDiv10 = __div10(__vp);
332abb5dd6eSMark de Wever       const uint64_t __vmDiv10 = __div10(__vm);
333abb5dd6eSMark de Wever       if (__vpDiv10 <= __vmDiv10) {
334abb5dd6eSMark de Wever         break;
335abb5dd6eSMark de Wever       }
336abb5dd6eSMark de Wever       const uint64_t __vrDiv10 = __div10(__vr);
337abb5dd6eSMark de Wever       const uint32_t __vrMod10 = static_cast<uint32_t>(__vr) - 10 * static_cast<uint32_t>(__vrDiv10);
338abb5dd6eSMark de Wever       __roundUp = __vrMod10 >= 5;
339abb5dd6eSMark de Wever       __vr = __vrDiv10;
340abb5dd6eSMark de Wever       __vp = __vpDiv10;
341abb5dd6eSMark de Wever       __vm = __vmDiv10;
342abb5dd6eSMark de Wever       ++__removed;
343abb5dd6eSMark de Wever     }
344abb5dd6eSMark de Wever     // We need to take __vr + 1 if __vr is outside bounds or we need to round up.
345abb5dd6eSMark de Wever     _Output = __vr + (__vr == __vm || __roundUp);
346abb5dd6eSMark de Wever   }
347abb5dd6eSMark de Wever   const int32_t __exp = __e10 + __removed;
348abb5dd6eSMark de Wever 
349abb5dd6eSMark de Wever   __floating_decimal_64 __fd;
350abb5dd6eSMark de Wever   __fd.__exponent = __exp;
351abb5dd6eSMark de Wever   __fd.__mantissa = _Output;
352abb5dd6eSMark de Wever   return __fd;
353abb5dd6eSMark de Wever }
354abb5dd6eSMark de Wever 
355abb5dd6eSMark de Wever [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline to_chars_result __to_chars(char* const _First, char* const _Last, const __floating_decimal_64 __v,
356abb5dd6eSMark de Wever   chars_format _Fmt, const double __f) {
357abb5dd6eSMark de Wever   // Step 5: Print the decimal representation.
358abb5dd6eSMark de Wever   uint64_t _Output = __v.__mantissa;
359abb5dd6eSMark de Wever   int32_t _Ryu_exponent = __v.__exponent;
360abb5dd6eSMark de Wever   const uint32_t __olength = __decimalLength17(_Output);
361abb5dd6eSMark de Wever   int32_t _Scientific_exponent = _Ryu_exponent + static_cast<int32_t>(__olength) - 1;
362abb5dd6eSMark de Wever 
363abb5dd6eSMark de Wever   if (_Fmt == chars_format{}) {
364abb5dd6eSMark de Wever     int32_t _Lower;
365abb5dd6eSMark de Wever     int32_t _Upper;
366abb5dd6eSMark de Wever 
367abb5dd6eSMark de Wever     if (__olength == 1) {
368abb5dd6eSMark de Wever       // Value | Fixed   | Scientific
369abb5dd6eSMark de Wever       // 1e-3  | "0.001" | "1e-03"
370abb5dd6eSMark de Wever       // 1e4   | "10000" | "1e+04"
371abb5dd6eSMark de Wever       _Lower = -3;
372abb5dd6eSMark de Wever       _Upper = 4;
373abb5dd6eSMark de Wever     } else {
374abb5dd6eSMark de Wever       // Value   | Fixed       | Scientific
375abb5dd6eSMark de Wever       // 1234e-7 | "0.0001234" | "1.234e-04"
376abb5dd6eSMark de Wever       // 1234e5  | "123400000" | "1.234e+08"
377abb5dd6eSMark de Wever       _Lower = -static_cast<int32_t>(__olength + 3);
378abb5dd6eSMark de Wever       _Upper = 5;
379abb5dd6eSMark de Wever     }
380abb5dd6eSMark de Wever 
381abb5dd6eSMark de Wever     if (_Lower <= _Ryu_exponent && _Ryu_exponent <= _Upper) {
382abb5dd6eSMark de Wever       _Fmt = chars_format::fixed;
383abb5dd6eSMark de Wever     } else {
384abb5dd6eSMark de Wever       _Fmt = chars_format::scientific;
385abb5dd6eSMark de Wever     }
386abb5dd6eSMark de Wever   } else if (_Fmt == chars_format::general) {
387abb5dd6eSMark de Wever     // C11 7.21.6.1 "The fprintf function"/8:
388abb5dd6eSMark de Wever     // "Let P equal [...] 6 if the precision is omitted [...].
389abb5dd6eSMark de Wever     // Then, if a conversion with style E would have an exponent of X:
390abb5dd6eSMark de Wever     // - if P > X >= -4, the conversion is with style f [...].
391abb5dd6eSMark de Wever     // - otherwise, the conversion is with style e [...]."
392abb5dd6eSMark de Wever     if (-4 <= _Scientific_exponent && _Scientific_exponent < 6) {
393abb5dd6eSMark de Wever       _Fmt = chars_format::fixed;
394abb5dd6eSMark de Wever     } else {
395abb5dd6eSMark de Wever       _Fmt = chars_format::scientific;
396abb5dd6eSMark de Wever     }
397abb5dd6eSMark de Wever   }
398abb5dd6eSMark de Wever 
399abb5dd6eSMark de Wever   if (_Fmt == chars_format::fixed) {
400abb5dd6eSMark de Wever     // Example: _Output == 1729, __olength == 4
401abb5dd6eSMark de Wever 
402abb5dd6eSMark de Wever     // _Ryu_exponent | Printed  | _Whole_digits | _Total_fixed_length  | Notes
403abb5dd6eSMark de Wever     // --------------|----------|---------------|----------------------|---------------------------------------
404abb5dd6eSMark de Wever     //             2 | 172900   |  6            | _Whole_digits        | Ryu can't be used for printing
405abb5dd6eSMark de Wever     //             1 | 17290    |  5            | (sometimes adjusted) | when the trimmed digits are nonzero.
406abb5dd6eSMark de Wever     // --------------|----------|---------------|----------------------|---------------------------------------
407abb5dd6eSMark de Wever     //             0 | 1729     |  4            | _Whole_digits        | Unified length cases.
408abb5dd6eSMark de Wever     // --------------|----------|---------------|----------------------|---------------------------------------
409abb5dd6eSMark de Wever     //            -1 | 172.9    |  3            | __olength + 1        | This case can't happen for
410abb5dd6eSMark de Wever     //            -2 | 17.29    |  2            |                      | __olength == 1, but no additional
411abb5dd6eSMark de Wever     //            -3 | 1.729    |  1            |                      | code is needed to avoid it.
412abb5dd6eSMark de Wever     // --------------|----------|---------------|----------------------|---------------------------------------
413abb5dd6eSMark de Wever     //            -4 | 0.1729   |  0            | 2 - _Ryu_exponent    | C11 7.21.6.1 "The fprintf function"/8:
414abb5dd6eSMark de Wever     //            -5 | 0.01729  | -1            |                      | "If a decimal-point character appears,
415abb5dd6eSMark de Wever     //            -6 | 0.001729 | -2            |                      | at least one digit appears before it."
416abb5dd6eSMark de Wever 
417abb5dd6eSMark de Wever     const int32_t _Whole_digits = static_cast<int32_t>(__olength) + _Ryu_exponent;
418abb5dd6eSMark de Wever 
419abb5dd6eSMark de Wever     uint32_t _Total_fixed_length;
420abb5dd6eSMark de Wever     if (_Ryu_exponent >= 0) { // cases "172900" and "1729"
421abb5dd6eSMark de Wever       _Total_fixed_length = static_cast<uint32_t>(_Whole_digits);
422abb5dd6eSMark de Wever       if (_Output == 1) {
423abb5dd6eSMark de Wever         // Rounding can affect the number of digits.
424abb5dd6eSMark de Wever         // For example, 1e23 is exactly "99999999999999991611392" which is 23 digits instead of 24.
425abb5dd6eSMark de Wever         // We can use a lookup table to detect this and adjust the total length.
426abb5dd6eSMark de Wever         static constexpr uint8_t _Adjustment[309] = {
427abb5dd6eSMark de Wever           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,
428abb5dd6eSMark de Wever           1,1,0,0,1,0,1,1,1,0,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,1,0,1,1,0,0,0,0,1,1,1,
429abb5dd6eSMark de Wever           1,0,0,0,0,0,0,0,1,1,0,1,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,0,0,1,1,1,1,1,0,1,0,1,1,0,1,
430abb5dd6eSMark de Wever           1,0,0,0,0,0,0,0,0,0,1,1,1,0,0,1,0,0,1,0,0,1,1,1,1,0,0,1,1,0,1,1,0,1,1,0,1,0,0,0,1,0,0,0,1,
431abb5dd6eSMark de Wever           0,1,0,1,0,1,1,1,0,0,0,0,0,0,1,1,1,1,0,0,1,0,1,1,1,0,0,0,1,0,1,1,1,1,1,1,0,1,0,1,1,0,0,0,1,
432abb5dd6eSMark de Wever           1,1,0,1,1,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,1,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,
433abb5dd6eSMark de Wever           0,1,0,1,1,1,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,0,1,1,0,1,0,0,0,0,0,1,1,0,1,0 };
434abb5dd6eSMark de Wever         _Total_fixed_length -= _Adjustment[_Ryu_exponent];
435abb5dd6eSMark de Wever         // _Whole_digits doesn't need to be adjusted because these cases won't refer to it later.
436abb5dd6eSMark de Wever       }
437abb5dd6eSMark de Wever     } else if (_Whole_digits > 0) { // case "17.29"
438abb5dd6eSMark de Wever       _Total_fixed_length = __olength + 1;
439abb5dd6eSMark de Wever     } else { // case "0.001729"
440abb5dd6eSMark de Wever       _Total_fixed_length = static_cast<uint32_t>(2 - _Ryu_exponent);
441abb5dd6eSMark de Wever     }
442abb5dd6eSMark de Wever 
443abb5dd6eSMark de Wever     if (_Last - _First < static_cast<ptrdiff_t>(_Total_fixed_length)) {
444abb5dd6eSMark de Wever       return { _Last, errc::value_too_large };
445abb5dd6eSMark de Wever     }
446abb5dd6eSMark de Wever 
447abb5dd6eSMark de Wever     char* _Mid;
448abb5dd6eSMark de Wever     if (_Ryu_exponent > 0) { // case "172900"
449abb5dd6eSMark de Wever       bool _Can_use_ryu;
450abb5dd6eSMark de Wever 
451abb5dd6eSMark de Wever       if (_Ryu_exponent > 22) { // 10^22 is the largest power of 10 that's exactly representable as a double.
452abb5dd6eSMark de Wever         _Can_use_ryu = false;
453abb5dd6eSMark de Wever       } else {
454abb5dd6eSMark de Wever         // Ryu generated X: __v.__mantissa * 10^_Ryu_exponent
455abb5dd6eSMark de Wever         // __v.__mantissa == 2^_Trailing_zero_bits * (__v.__mantissa >> _Trailing_zero_bits)
456abb5dd6eSMark de Wever         // 10^_Ryu_exponent == 2^_Ryu_exponent * 5^_Ryu_exponent
457abb5dd6eSMark de Wever 
458abb5dd6eSMark de Wever         // _Trailing_zero_bits is [0, 56] (aside: because 2^56 is the largest power of 2
459abb5dd6eSMark de Wever         // with 17 decimal digits, which is double's round-trip limit.)
460abb5dd6eSMark de Wever         // _Ryu_exponent is [1, 22].
461abb5dd6eSMark de Wever         // Normalization adds [2, 52] (aside: at least 2 because the pre-normalized mantissa is at least 5).
462abb5dd6eSMark de Wever         // This adds up to [3, 130], which is well below double's maximum binary exponent 1023.
463abb5dd6eSMark de Wever 
464abb5dd6eSMark de Wever         // Therefore, we just need to consider (__v.__mantissa >> _Trailing_zero_bits) * 5^_Ryu_exponent.
465abb5dd6eSMark de Wever 
466abb5dd6eSMark de Wever         // If that product would exceed 53 bits, then X can't be exactly represented as a double.
467abb5dd6eSMark de Wever         // (That's not a problem for round-tripping, because X is close enough to the original double,
468abb5dd6eSMark de Wever         // but X isn't mathematically equal to the original double.) This requires a high-precision fallback.
469abb5dd6eSMark de Wever 
470abb5dd6eSMark de Wever         // If the product is 53 bits or smaller, then X can be exactly represented as a double (and we don't
471abb5dd6eSMark de Wever         // need to re-synthesize it; the original double must have been X, because Ryu wouldn't produce the
472abb5dd6eSMark de Wever         // same output for two different doubles X and Y). This allows Ryu's output to be used (zero-filled).
473abb5dd6eSMark de Wever 
474abb5dd6eSMark de Wever         // (2^53 - 1) / 5^0 (for indexing), (2^53 - 1) / 5^1, ..., (2^53 - 1) / 5^22
475abb5dd6eSMark de Wever         static constexpr uint64_t _Max_shifted_mantissa[23] = {
476abb5dd6eSMark de Wever           9007199254740991u, 1801439850948198u, 360287970189639u, 72057594037927u, 14411518807585u,
477abb5dd6eSMark de Wever           2882303761517u, 576460752303u, 115292150460u, 23058430092u, 4611686018u, 922337203u, 184467440u,
478abb5dd6eSMark de Wever           36893488u, 7378697u, 1475739u, 295147u, 59029u, 11805u, 2361u, 472u, 94u, 18u, 3u };
479abb5dd6eSMark de Wever 
480abb5dd6eSMark de Wever         unsigned long _Trailing_zero_bits;
481*ba87515fSNikolas Klauser #if _LIBCPP_HAS_BITSCAN64
482abb5dd6eSMark de Wever         (void) _BitScanForward64(&_Trailing_zero_bits, __v.__mantissa); // __v.__mantissa is guaranteed nonzero
483abb5dd6eSMark de Wever #else // ^^^ 64-bit ^^^ / vvv 32-bit vvv
484abb5dd6eSMark de Wever         const uint32_t _Low_mantissa = static_cast<uint32_t>(__v.__mantissa);
485abb5dd6eSMark de Wever         if (_Low_mantissa != 0) {
486abb5dd6eSMark de Wever           (void) _BitScanForward(&_Trailing_zero_bits, _Low_mantissa);
487abb5dd6eSMark de Wever         } else {
488abb5dd6eSMark de Wever           const uint32_t _High_mantissa = static_cast<uint32_t>(__v.__mantissa >> 32); // nonzero here
489abb5dd6eSMark de Wever           (void) _BitScanForward(&_Trailing_zero_bits, _High_mantissa);
490abb5dd6eSMark de Wever           _Trailing_zero_bits += 32;
491abb5dd6eSMark de Wever         }
492abb5dd6eSMark de Wever #endif // ^^^ 32-bit ^^^
493abb5dd6eSMark de Wever         const uint64_t _Shifted_mantissa = __v.__mantissa >> _Trailing_zero_bits;
494abb5dd6eSMark de Wever         _Can_use_ryu = _Shifted_mantissa <= _Max_shifted_mantissa[_Ryu_exponent];
495abb5dd6eSMark de Wever       }
496abb5dd6eSMark de Wever 
497abb5dd6eSMark de Wever       if (!_Can_use_ryu) {
498abb5dd6eSMark de Wever         // Print the integer exactly.
499abb5dd6eSMark de Wever         // Performance note: This will redundantly perform bounds checking.
500abb5dd6eSMark de Wever         // Performance note: This will redundantly decompose the IEEE representation.
501abb5dd6eSMark de Wever         return __d2fixed_buffered_n(_First, _Last, __f, 0);
502abb5dd6eSMark de Wever       }
503abb5dd6eSMark de Wever 
504abb5dd6eSMark de Wever       // _Can_use_ryu
505abb5dd6eSMark de Wever       // Print the decimal digits, left-aligned within [_First, _First + _Total_fixed_length).
506abb5dd6eSMark de Wever       _Mid = _First + __olength;
507abb5dd6eSMark de Wever     } else { // cases "1729", "17.29", and "0.001729"
508abb5dd6eSMark de Wever       // Print the decimal digits, right-aligned within [_First, _First + _Total_fixed_length).
509abb5dd6eSMark de Wever       _Mid = _First + _Total_fixed_length;
510abb5dd6eSMark de Wever     }
511abb5dd6eSMark de Wever 
512abb5dd6eSMark de Wever     // We prefer 32-bit operations, even on 64-bit platforms.
513abb5dd6eSMark de Wever     // We have at most 17 digits, and uint32_t can store 9 digits.
514abb5dd6eSMark de Wever     // If _Output doesn't fit into uint32_t, we cut off 8 digits,
515abb5dd6eSMark de Wever     // so the rest will fit into uint32_t.
516abb5dd6eSMark de Wever     if ((_Output >> 32) != 0) {
517abb5dd6eSMark de Wever       // Expensive 64-bit division.
518abb5dd6eSMark de Wever       const uint64_t __q = __div1e8(_Output);
519abb5dd6eSMark de Wever       uint32_t __output2 = static_cast<uint32_t>(_Output - 100000000 * __q);
520abb5dd6eSMark de Wever       _Output = __q;
521abb5dd6eSMark de Wever 
522abb5dd6eSMark de Wever       const uint32_t __c = __output2 % 10000;
523abb5dd6eSMark de Wever       __output2 /= 10000;
524abb5dd6eSMark de Wever       const uint32_t __d = __output2 % 10000;
525abb5dd6eSMark de Wever       const uint32_t __c0 = (__c % 100) << 1;
526abb5dd6eSMark de Wever       const uint32_t __c1 = (__c / 100) << 1;
527abb5dd6eSMark de Wever       const uint32_t __d0 = (__d % 100) << 1;
528abb5dd6eSMark de Wever       const uint32_t __d1 = (__d / 100) << 1;
529abb5dd6eSMark de Wever 
5306e679286SLouis Dionne       std::memcpy(_Mid -= 2, __DIGIT_TABLE + __c0, 2);
5316e679286SLouis Dionne       std::memcpy(_Mid -= 2, __DIGIT_TABLE + __c1, 2);
5326e679286SLouis Dionne       std::memcpy(_Mid -= 2, __DIGIT_TABLE + __d0, 2);
5336e679286SLouis Dionne       std::memcpy(_Mid -= 2, __DIGIT_TABLE + __d1, 2);
534abb5dd6eSMark de Wever     }
535abb5dd6eSMark de Wever     uint32_t __output2 = static_cast<uint32_t>(_Output);
536abb5dd6eSMark de Wever     while (__output2 >= 10000) {
537abb5dd6eSMark de Wever #ifdef __clang__ // TRANSITION, LLVM-38217
538abb5dd6eSMark de Wever       const uint32_t __c = __output2 - 10000 * (__output2 / 10000);
539abb5dd6eSMark de Wever #else
540abb5dd6eSMark de Wever       const uint32_t __c = __output2 % 10000;
541abb5dd6eSMark de Wever #endif
542abb5dd6eSMark de Wever       __output2 /= 10000;
543abb5dd6eSMark de Wever       const uint32_t __c0 = (__c % 100) << 1;
544abb5dd6eSMark de Wever       const uint32_t __c1 = (__c / 100) << 1;
5456e679286SLouis Dionne       std::memcpy(_Mid -= 2, __DIGIT_TABLE + __c0, 2);
5466e679286SLouis Dionne       std::memcpy(_Mid -= 2, __DIGIT_TABLE + __c1, 2);
547abb5dd6eSMark de Wever     }
548abb5dd6eSMark de Wever     if (__output2 >= 100) {
549abb5dd6eSMark de Wever       const uint32_t __c = (__output2 % 100) << 1;
550abb5dd6eSMark de Wever       __output2 /= 100;
5516e679286SLouis Dionne       std::memcpy(_Mid -= 2, __DIGIT_TABLE + __c, 2);
552abb5dd6eSMark de Wever     }
553abb5dd6eSMark de Wever     if (__output2 >= 10) {
554abb5dd6eSMark de Wever       const uint32_t __c = __output2 << 1;
5556e679286SLouis Dionne       std::memcpy(_Mid -= 2, __DIGIT_TABLE + __c, 2);
556abb5dd6eSMark de Wever     } else {
557abb5dd6eSMark de Wever       *--_Mid = static_cast<char>('0' + __output2);
558abb5dd6eSMark de Wever     }
559abb5dd6eSMark de Wever 
560abb5dd6eSMark de Wever     if (_Ryu_exponent > 0) { // case "172900" with _Can_use_ryu
561abb5dd6eSMark de Wever       // Performance note: it might be more efficient to do this immediately after setting _Mid.
5626e679286SLouis Dionne       std::memset(_First + __olength, '0', static_cast<size_t>(_Ryu_exponent));
563abb5dd6eSMark de Wever     } else if (_Ryu_exponent == 0) { // case "1729"
564abb5dd6eSMark de Wever       // Done!
565abb5dd6eSMark de Wever     } else if (_Whole_digits > 0) { // case "17.29"
566abb5dd6eSMark de Wever       // Performance note: moving digits might not be optimal.
5676e679286SLouis Dionne       std::memmove(_First, _First + 1, static_cast<size_t>(_Whole_digits));
568abb5dd6eSMark de Wever       _First[_Whole_digits] = '.';
569abb5dd6eSMark de Wever     } else { // case "0.001729"
570abb5dd6eSMark de Wever       // Performance note: a larger memset() followed by overwriting '.' might be more efficient.
571abb5dd6eSMark de Wever       _First[0] = '0';
572abb5dd6eSMark de Wever       _First[1] = '.';
5736e679286SLouis Dionne       std::memset(_First + 2, '0', static_cast<size_t>(-_Whole_digits));
574abb5dd6eSMark de Wever     }
575abb5dd6eSMark de Wever 
576abb5dd6eSMark de Wever     return { _First + _Total_fixed_length, errc{} };
577abb5dd6eSMark de Wever   }
578abb5dd6eSMark de Wever 
579abb5dd6eSMark de Wever   const uint32_t _Total_scientific_length = __olength + (__olength > 1) // digits + possible decimal point
580abb5dd6eSMark de Wever     + (-100 < _Scientific_exponent && _Scientific_exponent < 100 ? 4 : 5); // + scientific exponent
581abb5dd6eSMark de Wever   if (_Last - _First < static_cast<ptrdiff_t>(_Total_scientific_length)) {
582abb5dd6eSMark de Wever     return { _Last, errc::value_too_large };
583abb5dd6eSMark de Wever   }
584abb5dd6eSMark de Wever   char* const __result = _First;
585abb5dd6eSMark de Wever 
586abb5dd6eSMark de Wever   // Print the decimal digits.
587abb5dd6eSMark de Wever   uint32_t __i = 0;
588abb5dd6eSMark de Wever   // We prefer 32-bit operations, even on 64-bit platforms.
589abb5dd6eSMark de Wever   // We have at most 17 digits, and uint32_t can store 9 digits.
590abb5dd6eSMark de Wever   // If _Output doesn't fit into uint32_t, we cut off 8 digits,
591abb5dd6eSMark de Wever   // so the rest will fit into uint32_t.
592abb5dd6eSMark de Wever   if ((_Output >> 32) != 0) {
593abb5dd6eSMark de Wever     // Expensive 64-bit division.
594abb5dd6eSMark de Wever     const uint64_t __q = __div1e8(_Output);
595abb5dd6eSMark de Wever     uint32_t __output2 = static_cast<uint32_t>(_Output) - 100000000 * static_cast<uint32_t>(__q);
596abb5dd6eSMark de Wever     _Output = __q;
597abb5dd6eSMark de Wever 
598abb5dd6eSMark de Wever     const uint32_t __c = __output2 % 10000;
599abb5dd6eSMark de Wever     __output2 /= 10000;
600abb5dd6eSMark de Wever     const uint32_t __d = __output2 % 10000;
601abb5dd6eSMark de Wever     const uint32_t __c0 = (__c % 100) << 1;
602abb5dd6eSMark de Wever     const uint32_t __c1 = (__c / 100) << 1;
603abb5dd6eSMark de Wever     const uint32_t __d0 = (__d % 100) << 1;
604abb5dd6eSMark de Wever     const uint32_t __d1 = (__d / 100) << 1;
6056e679286SLouis Dionne     std::memcpy(__result + __olength - __i - 1, __DIGIT_TABLE + __c0, 2);
6066e679286SLouis Dionne     std::memcpy(__result + __olength - __i - 3, __DIGIT_TABLE + __c1, 2);
6076e679286SLouis Dionne     std::memcpy(__result + __olength - __i - 5, __DIGIT_TABLE + __d0, 2);
6086e679286SLouis Dionne     std::memcpy(__result + __olength - __i - 7, __DIGIT_TABLE + __d1, 2);
609abb5dd6eSMark de Wever     __i += 8;
610abb5dd6eSMark de Wever   }
611abb5dd6eSMark de Wever   uint32_t __output2 = static_cast<uint32_t>(_Output);
612abb5dd6eSMark de Wever   while (__output2 >= 10000) {
613abb5dd6eSMark de Wever #ifdef __clang__ // TRANSITION, LLVM-38217
614abb5dd6eSMark de Wever     const uint32_t __c = __output2 - 10000 * (__output2 / 10000);
615abb5dd6eSMark de Wever #else
616abb5dd6eSMark de Wever     const uint32_t __c = __output2 % 10000;
617abb5dd6eSMark de Wever #endif
618abb5dd6eSMark de Wever     __output2 /= 10000;
619abb5dd6eSMark de Wever     const uint32_t __c0 = (__c % 100) << 1;
620abb5dd6eSMark de Wever     const uint32_t __c1 = (__c / 100) << 1;
6216e679286SLouis Dionne     std::memcpy(__result + __olength - __i - 1, __DIGIT_TABLE + __c0, 2);
6226e679286SLouis Dionne     std::memcpy(__result + __olength - __i - 3, __DIGIT_TABLE + __c1, 2);
623abb5dd6eSMark de Wever     __i += 4;
624abb5dd6eSMark de Wever   }
625abb5dd6eSMark de Wever   if (__output2 >= 100) {
626abb5dd6eSMark de Wever     const uint32_t __c = (__output2 % 100) << 1;
627abb5dd6eSMark de Wever     __output2 /= 100;
6286e679286SLouis Dionne     std::memcpy(__result + __olength - __i - 1, __DIGIT_TABLE + __c, 2);
629abb5dd6eSMark de Wever     __i += 2;
630abb5dd6eSMark de Wever   }
631abb5dd6eSMark de Wever   if (__output2 >= 10) {
632abb5dd6eSMark de Wever     const uint32_t __c = __output2 << 1;
633abb5dd6eSMark de Wever     // We can't use memcpy here: the decimal dot goes between these two digits.
634abb5dd6eSMark de Wever     __result[2] = __DIGIT_TABLE[__c + 1];
635abb5dd6eSMark de Wever     __result[0] = __DIGIT_TABLE[__c];
636abb5dd6eSMark de Wever   } else {
637abb5dd6eSMark de Wever     __result[0] = static_cast<char>('0' + __output2);
638abb5dd6eSMark de Wever   }
639abb5dd6eSMark de Wever 
640abb5dd6eSMark de Wever   // Print decimal point if needed.
641abb5dd6eSMark de Wever   uint32_t __index;
642abb5dd6eSMark de Wever   if (__olength > 1) {
643abb5dd6eSMark de Wever     __result[1] = '.';
644abb5dd6eSMark de Wever     __index = __olength + 1;
645abb5dd6eSMark de Wever   } else {
646abb5dd6eSMark de Wever     __index = 1;
647abb5dd6eSMark de Wever   }
648abb5dd6eSMark de Wever 
649abb5dd6eSMark de Wever   // Print the exponent.
650abb5dd6eSMark de Wever   __result[__index++] = 'e';
651abb5dd6eSMark de Wever   if (_Scientific_exponent < 0) {
652abb5dd6eSMark de Wever     __result[__index++] = '-';
653abb5dd6eSMark de Wever     _Scientific_exponent = -_Scientific_exponent;
654abb5dd6eSMark de Wever   } else {
655abb5dd6eSMark de Wever     __result[__index++] = '+';
656abb5dd6eSMark de Wever   }
657abb5dd6eSMark de Wever 
658abb5dd6eSMark de Wever   if (_Scientific_exponent >= 100) {
659abb5dd6eSMark de Wever     const int32_t __c = _Scientific_exponent % 10;
6606e679286SLouis Dionne     std::memcpy(__result + __index, __DIGIT_TABLE + 2 * (_Scientific_exponent / 10), 2);
661abb5dd6eSMark de Wever     __result[__index + 2] = static_cast<char>('0' + __c);
662abb5dd6eSMark de Wever     __index += 3;
663abb5dd6eSMark de Wever   } else {
6646e679286SLouis Dionne     std::memcpy(__result + __index, __DIGIT_TABLE + 2 * _Scientific_exponent, 2);
665abb5dd6eSMark de Wever     __index += 2;
666abb5dd6eSMark de Wever   }
667abb5dd6eSMark de Wever 
668abb5dd6eSMark de Wever   return { _First + _Total_scientific_length, errc{} };
669abb5dd6eSMark de Wever }
670abb5dd6eSMark de Wever 
671abb5dd6eSMark de Wever [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __d2d_small_int(const uint64_t __ieeeMantissa, const uint32_t __ieeeExponent,
672abb5dd6eSMark de Wever   __floating_decimal_64* const __v) {
673abb5dd6eSMark de Wever   const uint64_t __m2 = (1ull << __DOUBLE_MANTISSA_BITS) | __ieeeMantissa;
674abb5dd6eSMark de Wever   const int32_t __e2 = static_cast<int32_t>(__ieeeExponent) - __DOUBLE_BIAS - __DOUBLE_MANTISSA_BITS;
675abb5dd6eSMark de Wever 
676abb5dd6eSMark de Wever   if (__e2 > 0) {
677abb5dd6eSMark de Wever     // f = __m2 * 2^__e2 >= 2^53 is an integer.
678abb5dd6eSMark de Wever     // Ignore this case for now.
679abb5dd6eSMark de Wever     return false;
680abb5dd6eSMark de Wever   }
681abb5dd6eSMark de Wever 
682abb5dd6eSMark de Wever   if (__e2 < -52) {
683abb5dd6eSMark de Wever     // f < 1.
684abb5dd6eSMark de Wever     return false;
685abb5dd6eSMark de Wever   }
686abb5dd6eSMark de Wever 
687abb5dd6eSMark de Wever   // Since 2^52 <= __m2 < 2^53 and 0 <= -__e2 <= 52: 1 <= f = __m2 / 2^-__e2 < 2^53.
688abb5dd6eSMark de Wever   // Test if the lower -__e2 bits of the significand are 0, i.e. whether the fraction is 0.
689abb5dd6eSMark de Wever   const uint64_t __mask = (1ull << -__e2) - 1;
690abb5dd6eSMark de Wever   const uint64_t __fraction = __m2 & __mask;
691abb5dd6eSMark de Wever   if (__fraction != 0) {
692abb5dd6eSMark de Wever     return false;
693abb5dd6eSMark de Wever   }
694abb5dd6eSMark de Wever 
695abb5dd6eSMark de Wever   // f is an integer in the range [1, 2^53).
696abb5dd6eSMark de Wever   // Note: __mantissa might contain trailing (decimal) 0's.
697abb5dd6eSMark de Wever   // Note: since 2^53 < 10^16, there is no need to adjust __decimalLength17().
698abb5dd6eSMark de Wever   __v->__mantissa = __m2 >> -__e2;
699abb5dd6eSMark de Wever   __v->__exponent = 0;
700abb5dd6eSMark de Wever   return true;
701abb5dd6eSMark de Wever }
702abb5dd6eSMark de Wever 
703abb5dd6eSMark de Wever [[nodiscard]] to_chars_result __d2s_buffered_n(char* const _First, char* const _Last, const double __f,
704abb5dd6eSMark de Wever   const chars_format _Fmt) {
705abb5dd6eSMark de Wever 
706abb5dd6eSMark de Wever   // Step 1: Decode the floating-point number, and unify normalized and subnormal cases.
707abb5dd6eSMark de Wever   const uint64_t __bits = __double_to_bits(__f);
708abb5dd6eSMark de Wever 
709abb5dd6eSMark de Wever   // Case distinction; exit early for the easy cases.
710abb5dd6eSMark de Wever   if (__bits == 0) {
711abb5dd6eSMark de Wever     if (_Fmt == chars_format::scientific) {
712abb5dd6eSMark de Wever       if (_Last - _First < 5) {
713abb5dd6eSMark de Wever         return { _Last, errc::value_too_large };
714abb5dd6eSMark de Wever       }
715abb5dd6eSMark de Wever 
7166e679286SLouis Dionne       std::memcpy(_First, "0e+00", 5);
717abb5dd6eSMark de Wever 
718abb5dd6eSMark de Wever       return { _First + 5, errc{} };
719abb5dd6eSMark de Wever     }
720abb5dd6eSMark de Wever 
721abb5dd6eSMark de Wever     // Print "0" for chars_format::fixed, chars_format::general, and chars_format{}.
722abb5dd6eSMark de Wever     if (_First == _Last) {
723abb5dd6eSMark de Wever       return { _Last, errc::value_too_large };
724abb5dd6eSMark de Wever     }
725abb5dd6eSMark de Wever 
726abb5dd6eSMark de Wever     *_First = '0';
727abb5dd6eSMark de Wever 
728abb5dd6eSMark de Wever     return { _First + 1, errc{} };
729abb5dd6eSMark de Wever   }
730abb5dd6eSMark de Wever 
731abb5dd6eSMark de Wever   // Decode __bits into mantissa and exponent.
732abb5dd6eSMark de Wever   const uint64_t __ieeeMantissa = __bits & ((1ull << __DOUBLE_MANTISSA_BITS) - 1);
733abb5dd6eSMark de Wever   const uint32_t __ieeeExponent = static_cast<uint32_t>(__bits >> __DOUBLE_MANTISSA_BITS);
734abb5dd6eSMark de Wever 
735abb5dd6eSMark de Wever   if (_Fmt == chars_format::fixed) {
736abb5dd6eSMark de Wever     // const uint64_t _Mantissa2 = __ieeeMantissa | (1ull << __DOUBLE_MANTISSA_BITS); // restore implicit bit
737abb5dd6eSMark de Wever     const int32_t _Exponent2 = static_cast<int32_t>(__ieeeExponent)
738abb5dd6eSMark de Wever       - __DOUBLE_BIAS - __DOUBLE_MANTISSA_BITS; // bias and normalization
739abb5dd6eSMark de Wever 
740abb5dd6eSMark de Wever     // Normal values are equal to _Mantissa2 * 2^_Exponent2.
741abb5dd6eSMark de Wever     // (Subnormals are different, but they'll be rejected by the _Exponent2 test here, so they can be ignored.)
742abb5dd6eSMark de Wever 
743abb5dd6eSMark de Wever     // For nonzero integers, _Exponent2 >= -52. (The minimum value occurs when _Mantissa2 * 2^_Exponent2 is 1.
744abb5dd6eSMark de Wever     // In that case, _Mantissa2 is the implicit 1 bit followed by 52 zeros, so _Exponent2 is -52 to shift away
745abb5dd6eSMark de Wever     // the zeros.) The dense range of exactly representable integers has negative or zero exponents
746abb5dd6eSMark de Wever     // (as positive exponents make the range non-dense). For that dense range, Ryu will always be used:
747abb5dd6eSMark de Wever     // every digit is necessary to uniquely identify the value, so Ryu must print them all.
748abb5dd6eSMark de Wever 
749abb5dd6eSMark de Wever     // Positive exponents are the non-dense range of exactly representable integers. This contains all of the values
750abb5dd6eSMark de Wever     // for which Ryu can't be used (and a few Ryu-friendly values). We can save time by detecting positive
751abb5dd6eSMark de Wever     // exponents here and skipping Ryu. Calling __d2fixed_buffered_n() with precision 0 is valid for all integers
752abb5dd6eSMark de Wever     // (so it's okay if we call it with a Ryu-friendly value).
753abb5dd6eSMark de Wever     if (_Exponent2 > 0) {
754abb5dd6eSMark de Wever       return __d2fixed_buffered_n(_First, _Last, __f, 0);
755abb5dd6eSMark de Wever     }
756abb5dd6eSMark de Wever   }
757abb5dd6eSMark de Wever 
758abb5dd6eSMark de Wever   __floating_decimal_64 __v;
759abb5dd6eSMark de Wever   const bool __isSmallInt = __d2d_small_int(__ieeeMantissa, __ieeeExponent, &__v);
760abb5dd6eSMark de Wever   if (__isSmallInt) {
761abb5dd6eSMark de Wever     // For small integers in the range [1, 2^53), __v.__mantissa might contain trailing (decimal) zeros.
762abb5dd6eSMark de Wever     // For scientific notation we need to move these zeros into the exponent.
763abb5dd6eSMark de Wever     // (This is not needed for fixed-point notation, so it might be beneficial to trim
764abb5dd6eSMark de Wever     // trailing zeros in __to_chars only if needed - once fixed-point notation output is implemented.)
765abb5dd6eSMark de Wever     for (;;) {
766abb5dd6eSMark de Wever       const uint64_t __q = __div10(__v.__mantissa);
767abb5dd6eSMark de Wever       const uint32_t __r = static_cast<uint32_t>(__v.__mantissa) - 10 * static_cast<uint32_t>(__q);
768abb5dd6eSMark de Wever       if (__r != 0) {
769abb5dd6eSMark de Wever         break;
770abb5dd6eSMark de Wever       }
771abb5dd6eSMark de Wever       __v.__mantissa = __q;
772abb5dd6eSMark de Wever       ++__v.__exponent;
773abb5dd6eSMark de Wever     }
774abb5dd6eSMark de Wever   } else {
775abb5dd6eSMark de Wever     __v = __d2d(__ieeeMantissa, __ieeeExponent);
776abb5dd6eSMark de Wever   }
777abb5dd6eSMark de Wever 
778abb5dd6eSMark de Wever   return __to_chars(_First, _Last, __v, _Fmt, __f);
779abb5dd6eSMark de Wever }
780abb5dd6eSMark de Wever 
781abb5dd6eSMark de Wever _LIBCPP_END_NAMESPACE_STD
782abb5dd6eSMark de Wever 
783abb5dd6eSMark de Wever // clang-format on
784