xref: /freebsd-src/contrib/llvm-project/libcxx/src/include/ryu/d2s_intrinsics.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
10eae32dcSDimitry Andric //===----------------------------------------------------------------------===//
20eae32dcSDimitry Andric //
30eae32dcSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40eae32dcSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50eae32dcSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60eae32dcSDimitry Andric //
70eae32dcSDimitry Andric //===----------------------------------------------------------------------===//
80eae32dcSDimitry Andric 
90eae32dcSDimitry Andric // Copyright (c) Microsoft Corporation.
100eae32dcSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
110eae32dcSDimitry Andric 
120eae32dcSDimitry Andric // Copyright 2018 Ulf Adams
130eae32dcSDimitry Andric // Copyright (c) Microsoft Corporation. All rights reserved.
140eae32dcSDimitry Andric 
150eae32dcSDimitry Andric // Boost Software License - Version 1.0 - August 17th, 2003
160eae32dcSDimitry Andric 
170eae32dcSDimitry Andric // Permission is hereby granted, free of charge, to any person or organization
180eae32dcSDimitry Andric // obtaining a copy of the software and accompanying documentation covered by
190eae32dcSDimitry Andric // this license (the "Software") to use, reproduce, display, distribute,
200eae32dcSDimitry Andric // execute, and transmit the Software, and to prepare derivative works of the
210eae32dcSDimitry Andric // Software, and to permit third-parties to whom the Software is furnished to
220eae32dcSDimitry Andric // do so, all subject to the following:
230eae32dcSDimitry Andric 
240eae32dcSDimitry Andric // The copyright notices in the Software and this entire statement, including
250eae32dcSDimitry Andric // the above license grant, this restriction and the following disclaimer,
260eae32dcSDimitry Andric // must be included in all copies of the Software, in whole or in part, and
270eae32dcSDimitry Andric // all derivative works of the Software, unless such copies or derivative
280eae32dcSDimitry Andric // works are solely in the form of machine-executable object code generated by
290eae32dcSDimitry Andric // a source language processor.
300eae32dcSDimitry Andric 
310eae32dcSDimitry Andric // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
320eae32dcSDimitry Andric // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
330eae32dcSDimitry Andric // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
340eae32dcSDimitry Andric // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
350eae32dcSDimitry Andric // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
360eae32dcSDimitry Andric // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
370eae32dcSDimitry Andric // DEALINGS IN THE SOFTWARE.
380eae32dcSDimitry Andric 
390eae32dcSDimitry Andric #ifndef _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H
400eae32dcSDimitry Andric #define _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H
410eae32dcSDimitry Andric 
420eae32dcSDimitry Andric // Avoid formatting to keep the changes with the original code minimal.
430eae32dcSDimitry Andric // clang-format off
440eae32dcSDimitry Andric 
4581ad6265SDimitry Andric #include <__assert>
4681ad6265SDimitry Andric #include <__config>
4781ad6265SDimitry Andric 
4881ad6265SDimitry Andric #include "include/ryu/ryu.h"
490eae32dcSDimitry Andric 
500eae32dcSDimitry Andric _LIBCPP_BEGIN_NAMESPACE_STD
510eae32dcSDimitry Andric 
520eae32dcSDimitry Andric #if defined(_M_X64) && defined(_MSC_VER)
530eae32dcSDimitry Andric #define _LIBCPP_INTRINSIC128 1
__ryu_umul128(const uint64_t __a,const uint64_t __b,uint64_t * const __productHi)540eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {
550eae32dcSDimitry Andric   return _umul128(__a, __b, __productHi);
560eae32dcSDimitry Andric }
570eae32dcSDimitry Andric 
__ryu_shiftright128(const uint64_t __lo,const uint64_t __hi,const uint32_t __dist)580eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {
590eae32dcSDimitry Andric   // For the __shiftright128 intrinsic, the shift value is always
600eae32dcSDimitry Andric   // modulo 64.
610eae32dcSDimitry Andric   // In the current implementation of the double-precision version
620eae32dcSDimitry Andric   // of Ryu, the shift value is always < 64.
630eae32dcSDimitry Andric   // (The shift value is in the range [49, 58].)
640eae32dcSDimitry Andric   // Check this here in case a future change requires larger shift
650eae32dcSDimitry Andric   // values. In this case this function needs to be adjusted.
66*5f757f3fSDimitry Andric   _LIBCPP_ASSERT_INTERNAL(__dist < 64, "");
670eae32dcSDimitry Andric   return __shiftright128(__lo, __hi, static_cast<unsigned char>(__dist));
680eae32dcSDimitry Andric }
690eae32dcSDimitry Andric 
700eae32dcSDimitry Andric // ^^^ intrinsics available ^^^ / vvv __int128 available vvv
710eae32dcSDimitry Andric #elif defined(__SIZEOF_INT128__) && ( \
720eae32dcSDimitry Andric     (defined(__clang__) && !defined(_MSC_VER)) || \
730eae32dcSDimitry Andric     (defined(__GNUC__) && !defined(__clang__) && !defined(__CUDACC__)))
740eae32dcSDimitry Andric #define _LIBCPP_INTRINSIC128 1
750eae32dcSDimitry Andric   // We have __uint128 support in clang or gcc
760eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {
770eae32dcSDimitry Andric   auto __temp = __a * (unsigned __int128)__b;
780eae32dcSDimitry Andric   *__productHi = __temp >> 64;
790eae32dcSDimitry Andric   return static_cast<uint64_t>(__temp);
800eae32dcSDimitry Andric }
810eae32dcSDimitry Andric 
820eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {
830eae32dcSDimitry Andric   // In the current implementation of the double-precision version
840eae32dcSDimitry Andric   // of Ryu, the shift value is always < 64.
850eae32dcSDimitry Andric   // (The shift value is in the range [49, 58].)
860eae32dcSDimitry Andric   // Check this here in case a future change requires larger shift
870eae32dcSDimitry Andric   // values. In this case this function needs to be adjusted.
88*5f757f3fSDimitry Andric   _LIBCPP_ASSERT_INTERNAL(__dist < 64, "");
890eae32dcSDimitry Andric   auto __temp = __lo | ((unsigned __int128)__hi << 64);
900eae32dcSDimitry Andric   // For x64 128-bit shfits using the `shrd` instruction and two 64-bit
910eae32dcSDimitry Andric   // registers, the shift value is modulo 64.  Thus the `& 63` is free.
920eae32dcSDimitry Andric   return static_cast<uint64_t>(__temp >> (__dist & 63));
930eae32dcSDimitry Andric }
940eae32dcSDimitry Andric #else // ^^^ __int128 available ^^^ / vvv intrinsics unavailable vvv
950eae32dcSDimitry Andric 
960eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_ALWAYS_INLINE uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) {
970eae32dcSDimitry Andric   // TRANSITION, VSO-634761
980eae32dcSDimitry Andric   // The casts here help MSVC to avoid calls to the __allmul library function.
990eae32dcSDimitry Andric   const uint32_t __aLo = static_cast<uint32_t>(__a);
1000eae32dcSDimitry Andric   const uint32_t __aHi = static_cast<uint32_t>(__a >> 32);
1010eae32dcSDimitry Andric   const uint32_t __bLo = static_cast<uint32_t>(__b);
1020eae32dcSDimitry Andric   const uint32_t __bHi = static_cast<uint32_t>(__b >> 32);
1030eae32dcSDimitry Andric 
1040eae32dcSDimitry Andric   const uint64_t __b00 = static_cast<uint64_t>(__aLo) * __bLo;
1050eae32dcSDimitry Andric   const uint64_t __b01 = static_cast<uint64_t>(__aLo) * __bHi;
1060eae32dcSDimitry Andric   const uint64_t __b10 = static_cast<uint64_t>(__aHi) * __bLo;
1070eae32dcSDimitry Andric   const uint64_t __b11 = static_cast<uint64_t>(__aHi) * __bHi;
1080eae32dcSDimitry Andric 
1090eae32dcSDimitry Andric   const uint32_t __b00Lo = static_cast<uint32_t>(__b00);
1100eae32dcSDimitry Andric   const uint32_t __b00Hi = static_cast<uint32_t>(__b00 >> 32);
1110eae32dcSDimitry Andric 
1120eae32dcSDimitry Andric   const uint64_t __mid1 = __b10 + __b00Hi;
1130eae32dcSDimitry Andric   const uint32_t __mid1Lo = static_cast<uint32_t>(__mid1);
1140eae32dcSDimitry Andric   const uint32_t __mid1Hi = static_cast<uint32_t>(__mid1 >> 32);
1150eae32dcSDimitry Andric 
1160eae32dcSDimitry Andric   const uint64_t __mid2 = __b01 + __mid1Lo;
1170eae32dcSDimitry Andric   const uint32_t __mid2Lo = static_cast<uint32_t>(__mid2);
1180eae32dcSDimitry Andric   const uint32_t __mid2Hi = static_cast<uint32_t>(__mid2 >> 32);
1190eae32dcSDimitry Andric 
1200eae32dcSDimitry Andric   const uint64_t __pHi = __b11 + __mid1Hi + __mid2Hi;
1210eae32dcSDimitry Andric   const uint64_t __pLo = (static_cast<uint64_t>(__mid2Lo) << 32) | __b00Lo;
1220eae32dcSDimitry Andric 
1230eae32dcSDimitry Andric   *__productHi = __pHi;
1240eae32dcSDimitry Andric   return __pLo;
1250eae32dcSDimitry Andric }
1260eae32dcSDimitry Andric 
1270eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) {
1280eae32dcSDimitry Andric   // We don't need to handle the case __dist >= 64 here (see above).
129*5f757f3fSDimitry Andric   _LIBCPP_ASSERT_INTERNAL(__dist < 64, "");
1300eae32dcSDimitry Andric #ifdef _LIBCPP_64_BIT
131*5f757f3fSDimitry Andric   _LIBCPP_ASSERT_INTERNAL(__dist > 0, "");
1320eae32dcSDimitry Andric   return (__hi << (64 - __dist)) | (__lo >> __dist);
1330eae32dcSDimitry Andric #else // ^^^ 64-bit ^^^ / vvv 32-bit vvv
1340eae32dcSDimitry Andric   // Avoid a 64-bit shift by taking advantage of the range of shift values.
135*5f757f3fSDimitry Andric   _LIBCPP_ASSERT_INTERNAL(__dist >= 32, "");
1360eae32dcSDimitry Andric   return (__hi << (64 - __dist)) | (static_cast<uint32_t>(__lo >> 32) >> (__dist - 32));
1370eae32dcSDimitry Andric #endif // ^^^ 32-bit ^^^
1380eae32dcSDimitry Andric }
1390eae32dcSDimitry Andric 
1400eae32dcSDimitry Andric #endif // ^^^ intrinsics unavailable ^^^
1410eae32dcSDimitry Andric 
1420eae32dcSDimitry Andric #ifndef _LIBCPP_64_BIT
1430eae32dcSDimitry Andric 
1440eae32dcSDimitry Andric // Returns the high 64 bits of the 128-bit product of __a and __b.
__umulh(const uint64_t __a,const uint64_t __b)1450eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __umulh(const uint64_t __a, const uint64_t __b) {
1460eae32dcSDimitry Andric   // Reuse the __ryu_umul128 implementation.
1470eae32dcSDimitry Andric   // Optimizers will likely eliminate the instructions used to compute the
1480eae32dcSDimitry Andric   // low part of the product.
1490eae32dcSDimitry Andric   uint64_t __hi;
1500eae32dcSDimitry Andric   (void) __ryu_umul128(__a, __b, &__hi);
1510eae32dcSDimitry Andric   return __hi;
1520eae32dcSDimitry Andric }
1530eae32dcSDimitry Andric 
1540eae32dcSDimitry Andric // On 32-bit platforms, compilers typically generate calls to library
1550eae32dcSDimitry Andric // functions for 64-bit divisions, even if the divisor is a constant.
1560eae32dcSDimitry Andric //
1570eae32dcSDimitry Andric // TRANSITION, LLVM-37932
1580eae32dcSDimitry Andric //
1590eae32dcSDimitry Andric // The functions here perform division-by-constant using multiplications
1600eae32dcSDimitry Andric // in the same way as 64-bit compilers would do.
1610eae32dcSDimitry Andric //
1620eae32dcSDimitry Andric // NB:
1630eae32dcSDimitry Andric // The multipliers and shift values are the ones generated by clang x64
1640eae32dcSDimitry Andric // for expressions like x/5, x/10, etc.
1650eae32dcSDimitry Andric 
__div5(const uint64_t __x)1660eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) {
1670eae32dcSDimitry Andric   return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 2;
1680eae32dcSDimitry Andric }
1690eae32dcSDimitry Andric 
__div10(const uint64_t __x)1700eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) {
1710eae32dcSDimitry Andric   return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 3;
1720eae32dcSDimitry Andric }
1730eae32dcSDimitry Andric 
__div100(const uint64_t __x)1740eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) {
1750eae32dcSDimitry Andric   return __umulh(__x >> 2, 0x28F5C28F5C28F5C3u) >> 2;
1760eae32dcSDimitry Andric }
1770eae32dcSDimitry Andric 
__div1e8(const uint64_t __x)1780eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) {
1790eae32dcSDimitry Andric   return __umulh(__x, 0xABCC77118461CEFDu) >> 26;
1800eae32dcSDimitry Andric }
1810eae32dcSDimitry Andric 
__div1e9(const uint64_t __x)1820eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) {
1830eae32dcSDimitry Andric   return __umulh(__x >> 9, 0x44B82FA09B5A53u) >> 11;
1840eae32dcSDimitry Andric }
1850eae32dcSDimitry Andric 
__mod1e9(const uint64_t __x)1860eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) {
1870eae32dcSDimitry Andric   // Avoid 64-bit math as much as possible.
1880eae32dcSDimitry Andric   // Returning static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)) would
1890eae32dcSDimitry Andric   // perform 32x64-bit multiplication and 64-bit subtraction.
1900eae32dcSDimitry Andric   // __x and 1000000000 * __div1e9(__x) are guaranteed to differ by
1910eae32dcSDimitry Andric   // less than 10^9, so their highest 32 bits must be identical,
1920eae32dcSDimitry Andric   // so we can truncate both sides to uint32_t before subtracting.
1930eae32dcSDimitry Andric   // We can also simplify static_cast<uint32_t>(1000000000 * __div1e9(__x)).
1940eae32dcSDimitry Andric   // We can truncate before multiplying instead of after, as multiplying
1950eae32dcSDimitry Andric   // the highest 32 bits of __div1e9(__x) can't affect the lowest 32 bits.
1960eae32dcSDimitry Andric   return static_cast<uint32_t>(__x) - 1000000000 * static_cast<uint32_t>(__div1e9(__x));
1970eae32dcSDimitry Andric }
1980eae32dcSDimitry Andric 
1990eae32dcSDimitry Andric #else // ^^^ 32-bit ^^^ / vvv 64-bit vvv
2000eae32dcSDimitry Andric 
__div5(const uint64_t __x)2010eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) {
2020eae32dcSDimitry Andric   return __x / 5;
2030eae32dcSDimitry Andric }
2040eae32dcSDimitry Andric 
__div10(const uint64_t __x)2050eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) {
2060eae32dcSDimitry Andric   return __x / 10;
2070eae32dcSDimitry Andric }
2080eae32dcSDimitry Andric 
__div100(const uint64_t __x)2090eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) {
2100eae32dcSDimitry Andric   return __x / 100;
2110eae32dcSDimitry Andric }
2120eae32dcSDimitry Andric 
__div1e8(const uint64_t __x)2130eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) {
2140eae32dcSDimitry Andric   return __x / 100000000;
2150eae32dcSDimitry Andric }
2160eae32dcSDimitry Andric 
__div1e9(const uint64_t __x)2170eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) {
2180eae32dcSDimitry Andric   return __x / 1000000000;
2190eae32dcSDimitry Andric }
2200eae32dcSDimitry Andric 
__mod1e9(const uint64_t __x)2210eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) {
2220eae32dcSDimitry Andric   return static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x));
2230eae32dcSDimitry Andric }
2240eae32dcSDimitry Andric 
2250eae32dcSDimitry Andric #endif // ^^^ 64-bit ^^^
2260eae32dcSDimitry Andric 
__pow5Factor(uint64_t __value)2270eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint64_t __value) {
2280eae32dcSDimitry Andric   uint32_t __count = 0;
2290eae32dcSDimitry Andric   for (;;) {
230*5f757f3fSDimitry Andric     _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
2310eae32dcSDimitry Andric     const uint64_t __q = __div5(__value);
2320eae32dcSDimitry Andric     const uint32_t __r = static_cast<uint32_t>(__value) - 5 * static_cast<uint32_t>(__q);
2330eae32dcSDimitry Andric     if (__r != 0) {
2340eae32dcSDimitry Andric       break;
2350eae32dcSDimitry Andric     }
2360eae32dcSDimitry Andric     __value = __q;
2370eae32dcSDimitry Andric     ++__count;
2380eae32dcSDimitry Andric   }
2390eae32dcSDimitry Andric   return __count;
2400eae32dcSDimitry Andric }
2410eae32dcSDimitry Andric 
2420eae32dcSDimitry Andric // Returns true if __value is divisible by 5^__p.
__multipleOfPowerOf5(const uint64_t __value,const uint32_t __p)2430eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf5(const uint64_t __value, const uint32_t __p) {
2440eae32dcSDimitry Andric   // I tried a case distinction on __p, but there was no performance difference.
2450eae32dcSDimitry Andric   return __pow5Factor(__value) >= __p;
2460eae32dcSDimitry Andric }
2470eae32dcSDimitry Andric 
2480eae32dcSDimitry Andric // Returns true if __value is divisible by 2^__p.
__multipleOfPowerOf2(const uint64_t __value,const uint32_t __p)2490eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {
250*5f757f3fSDimitry Andric   _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
251*5f757f3fSDimitry Andric   _LIBCPP_ASSERT_INTERNAL(__p < 64, "");
2520eae32dcSDimitry Andric   // __builtin_ctzll doesn't appear to be faster here.
2530eae32dcSDimitry Andric   return (__value & ((1ull << __p) - 1)) == 0;
2540eae32dcSDimitry Andric }
2550eae32dcSDimitry Andric 
2560eae32dcSDimitry Andric _LIBCPP_END_NAMESPACE_STD
2570eae32dcSDimitry Andric 
2580eae32dcSDimitry Andric // clang-format on
2590eae32dcSDimitry Andric 
2600eae32dcSDimitry Andric #endif // _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H
261