10eae32dcSDimitry Andric //===----------------------------------------------------------------------===// 20eae32dcSDimitry Andric // 30eae32dcSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40eae32dcSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50eae32dcSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60eae32dcSDimitry Andric // 70eae32dcSDimitry Andric //===----------------------------------------------------------------------===// 80eae32dcSDimitry Andric 90eae32dcSDimitry Andric // Copyright (c) Microsoft Corporation. 100eae32dcSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 110eae32dcSDimitry Andric 120eae32dcSDimitry Andric // Copyright 2018 Ulf Adams 130eae32dcSDimitry Andric // Copyright (c) Microsoft Corporation. All rights reserved. 140eae32dcSDimitry Andric 150eae32dcSDimitry Andric // Boost Software License - Version 1.0 - August 17th, 2003 160eae32dcSDimitry Andric 170eae32dcSDimitry Andric // Permission is hereby granted, free of charge, to any person or organization 180eae32dcSDimitry Andric // obtaining a copy of the software and accompanying documentation covered by 190eae32dcSDimitry Andric // this license (the "Software") to use, reproduce, display, distribute, 200eae32dcSDimitry Andric // execute, and transmit the Software, and to prepare derivative works of the 210eae32dcSDimitry Andric // Software, and to permit third-parties to whom the Software is furnished to 220eae32dcSDimitry Andric // do so, all subject to the following: 230eae32dcSDimitry Andric 240eae32dcSDimitry Andric // The copyright notices in the Software and this entire statement, including 250eae32dcSDimitry Andric // the above license grant, this restriction and the following disclaimer, 260eae32dcSDimitry Andric // must be included in all copies of the Software, in whole or in part, and 270eae32dcSDimitry Andric // all derivative works of the Software, unless such copies or derivative 280eae32dcSDimitry Andric // works are solely in the form of machine-executable object code generated by 290eae32dcSDimitry Andric // a source language processor. 300eae32dcSDimitry Andric 310eae32dcSDimitry Andric // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 320eae32dcSDimitry Andric // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 330eae32dcSDimitry Andric // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 340eae32dcSDimitry Andric // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 350eae32dcSDimitry Andric // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 360eae32dcSDimitry Andric // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 370eae32dcSDimitry Andric // DEALINGS IN THE SOFTWARE. 380eae32dcSDimitry Andric 390eae32dcSDimitry Andric #ifndef _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H 400eae32dcSDimitry Andric #define _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H 410eae32dcSDimitry Andric 420eae32dcSDimitry Andric // Avoid formatting to keep the changes with the original code minimal. 430eae32dcSDimitry Andric // clang-format off 440eae32dcSDimitry Andric 45*81ad6265SDimitry Andric #include <__assert> 46*81ad6265SDimitry Andric #include <__config> 47*81ad6265SDimitry Andric 48*81ad6265SDimitry Andric #include "include/ryu/ryu.h" 490eae32dcSDimitry Andric 500eae32dcSDimitry Andric _LIBCPP_BEGIN_NAMESPACE_STD 510eae32dcSDimitry Andric 520eae32dcSDimitry Andric #if defined(_M_X64) && defined(_MSC_VER) 530eae32dcSDimitry Andric #define _LIBCPP_INTRINSIC128 1 540eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { 550eae32dcSDimitry Andric return _umul128(__a, __b, __productHi); 560eae32dcSDimitry Andric } 570eae32dcSDimitry Andric 580eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { 590eae32dcSDimitry Andric // For the __shiftright128 intrinsic, the shift value is always 600eae32dcSDimitry Andric // modulo 64. 610eae32dcSDimitry Andric // In the current implementation of the double-precision version 620eae32dcSDimitry Andric // of Ryu, the shift value is always < 64. 630eae32dcSDimitry Andric // (The shift value is in the range [49, 58].) 640eae32dcSDimitry Andric // Check this here in case a future change requires larger shift 650eae32dcSDimitry Andric // values. In this case this function needs to be adjusted. 660eae32dcSDimitry Andric _LIBCPP_ASSERT(__dist < 64, ""); 670eae32dcSDimitry Andric return __shiftright128(__lo, __hi, static_cast<unsigned char>(__dist)); 680eae32dcSDimitry Andric } 690eae32dcSDimitry Andric 700eae32dcSDimitry Andric // ^^^ intrinsics available ^^^ / vvv __int128 available vvv 710eae32dcSDimitry Andric #elif defined(__SIZEOF_INT128__) && ( \ 720eae32dcSDimitry Andric (defined(__clang__) && !defined(_MSC_VER)) || \ 730eae32dcSDimitry Andric (defined(__GNUC__) && !defined(__clang__) && !defined(__CUDACC__))) 740eae32dcSDimitry Andric #define _LIBCPP_INTRINSIC128 1 750eae32dcSDimitry Andric // We have __uint128 support in clang or gcc 760eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { 770eae32dcSDimitry Andric auto __temp = __a * (unsigned __int128)__b; 780eae32dcSDimitry Andric *__productHi = __temp >> 64; 790eae32dcSDimitry Andric return static_cast<uint64_t>(__temp); 800eae32dcSDimitry Andric } 810eae32dcSDimitry Andric 820eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { 830eae32dcSDimitry Andric // In the current implementation of the double-precision version 840eae32dcSDimitry Andric // of Ryu, the shift value is always < 64. 850eae32dcSDimitry Andric // (The shift value is in the range [49, 58].) 860eae32dcSDimitry Andric // Check this here in case a future change requires larger shift 870eae32dcSDimitry Andric // values. In this case this function needs to be adjusted. 880eae32dcSDimitry Andric _LIBCPP_ASSERT(__dist < 64, ""); 890eae32dcSDimitry Andric auto __temp = __lo | ((unsigned __int128)__hi << 64); 900eae32dcSDimitry Andric // For x64 128-bit shfits using the `shrd` instruction and two 64-bit 910eae32dcSDimitry Andric // registers, the shift value is modulo 64. Thus the `& 63` is free. 920eae32dcSDimitry Andric return static_cast<uint64_t>(__temp >> (__dist & 63)); 930eae32dcSDimitry Andric } 940eae32dcSDimitry Andric #else // ^^^ __int128 available ^^^ / vvv intrinsics unavailable vvv 950eae32dcSDimitry Andric 960eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_ALWAYS_INLINE uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { 970eae32dcSDimitry Andric // TRANSITION, VSO-634761 980eae32dcSDimitry Andric // The casts here help MSVC to avoid calls to the __allmul library function. 990eae32dcSDimitry Andric const uint32_t __aLo = static_cast<uint32_t>(__a); 1000eae32dcSDimitry Andric const uint32_t __aHi = static_cast<uint32_t>(__a >> 32); 1010eae32dcSDimitry Andric const uint32_t __bLo = static_cast<uint32_t>(__b); 1020eae32dcSDimitry Andric const uint32_t __bHi = static_cast<uint32_t>(__b >> 32); 1030eae32dcSDimitry Andric 1040eae32dcSDimitry Andric const uint64_t __b00 = static_cast<uint64_t>(__aLo) * __bLo; 1050eae32dcSDimitry Andric const uint64_t __b01 = static_cast<uint64_t>(__aLo) * __bHi; 1060eae32dcSDimitry Andric const uint64_t __b10 = static_cast<uint64_t>(__aHi) * __bLo; 1070eae32dcSDimitry Andric const uint64_t __b11 = static_cast<uint64_t>(__aHi) * __bHi; 1080eae32dcSDimitry Andric 1090eae32dcSDimitry Andric const uint32_t __b00Lo = static_cast<uint32_t>(__b00); 1100eae32dcSDimitry Andric const uint32_t __b00Hi = static_cast<uint32_t>(__b00 >> 32); 1110eae32dcSDimitry Andric 1120eae32dcSDimitry Andric const uint64_t __mid1 = __b10 + __b00Hi; 1130eae32dcSDimitry Andric const uint32_t __mid1Lo = static_cast<uint32_t>(__mid1); 1140eae32dcSDimitry Andric const uint32_t __mid1Hi = static_cast<uint32_t>(__mid1 >> 32); 1150eae32dcSDimitry Andric 1160eae32dcSDimitry Andric const uint64_t __mid2 = __b01 + __mid1Lo; 1170eae32dcSDimitry Andric const uint32_t __mid2Lo = static_cast<uint32_t>(__mid2); 1180eae32dcSDimitry Andric const uint32_t __mid2Hi = static_cast<uint32_t>(__mid2 >> 32); 1190eae32dcSDimitry Andric 1200eae32dcSDimitry Andric const uint64_t __pHi = __b11 + __mid1Hi + __mid2Hi; 1210eae32dcSDimitry Andric const uint64_t __pLo = (static_cast<uint64_t>(__mid2Lo) << 32) | __b00Lo; 1220eae32dcSDimitry Andric 1230eae32dcSDimitry Andric *__productHi = __pHi; 1240eae32dcSDimitry Andric return __pLo; 1250eae32dcSDimitry Andric } 1260eae32dcSDimitry Andric 1270eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { 1280eae32dcSDimitry Andric // We don't need to handle the case __dist >= 64 here (see above). 1290eae32dcSDimitry Andric _LIBCPP_ASSERT(__dist < 64, ""); 1300eae32dcSDimitry Andric #ifdef _LIBCPP_64_BIT 1310eae32dcSDimitry Andric _LIBCPP_ASSERT(__dist > 0, ""); 1320eae32dcSDimitry Andric return (__hi << (64 - __dist)) | (__lo >> __dist); 1330eae32dcSDimitry Andric #else // ^^^ 64-bit ^^^ / vvv 32-bit vvv 1340eae32dcSDimitry Andric // Avoid a 64-bit shift by taking advantage of the range of shift values. 1350eae32dcSDimitry Andric _LIBCPP_ASSERT(__dist >= 32, ""); 1360eae32dcSDimitry Andric return (__hi << (64 - __dist)) | (static_cast<uint32_t>(__lo >> 32) >> (__dist - 32)); 1370eae32dcSDimitry Andric #endif // ^^^ 32-bit ^^^ 1380eae32dcSDimitry Andric } 1390eae32dcSDimitry Andric 1400eae32dcSDimitry Andric #endif // ^^^ intrinsics unavailable ^^^ 1410eae32dcSDimitry Andric 1420eae32dcSDimitry Andric #ifndef _LIBCPP_64_BIT 1430eae32dcSDimitry Andric 1440eae32dcSDimitry Andric // Returns the high 64 bits of the 128-bit product of __a and __b. 1450eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __umulh(const uint64_t __a, const uint64_t __b) { 1460eae32dcSDimitry Andric // Reuse the __ryu_umul128 implementation. 1470eae32dcSDimitry Andric // Optimizers will likely eliminate the instructions used to compute the 1480eae32dcSDimitry Andric // low part of the product. 1490eae32dcSDimitry Andric uint64_t __hi; 1500eae32dcSDimitry Andric (void) __ryu_umul128(__a, __b, &__hi); 1510eae32dcSDimitry Andric return __hi; 1520eae32dcSDimitry Andric } 1530eae32dcSDimitry Andric 1540eae32dcSDimitry Andric // On 32-bit platforms, compilers typically generate calls to library 1550eae32dcSDimitry Andric // functions for 64-bit divisions, even if the divisor is a constant. 1560eae32dcSDimitry Andric // 1570eae32dcSDimitry Andric // TRANSITION, LLVM-37932 1580eae32dcSDimitry Andric // 1590eae32dcSDimitry Andric // The functions here perform division-by-constant using multiplications 1600eae32dcSDimitry Andric // in the same way as 64-bit compilers would do. 1610eae32dcSDimitry Andric // 1620eae32dcSDimitry Andric // NB: 1630eae32dcSDimitry Andric // The multipliers and shift values are the ones generated by clang x64 1640eae32dcSDimitry Andric // for expressions like x/5, x/10, etc. 1650eae32dcSDimitry Andric 1660eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) { 1670eae32dcSDimitry Andric return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 2; 1680eae32dcSDimitry Andric } 1690eae32dcSDimitry Andric 1700eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) { 1710eae32dcSDimitry Andric return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 3; 1720eae32dcSDimitry Andric } 1730eae32dcSDimitry Andric 1740eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) { 1750eae32dcSDimitry Andric return __umulh(__x >> 2, 0x28F5C28F5C28F5C3u) >> 2; 1760eae32dcSDimitry Andric } 1770eae32dcSDimitry Andric 1780eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) { 1790eae32dcSDimitry Andric return __umulh(__x, 0xABCC77118461CEFDu) >> 26; 1800eae32dcSDimitry Andric } 1810eae32dcSDimitry Andric 1820eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) { 1830eae32dcSDimitry Andric return __umulh(__x >> 9, 0x44B82FA09B5A53u) >> 11; 1840eae32dcSDimitry Andric } 1850eae32dcSDimitry Andric 1860eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) { 1870eae32dcSDimitry Andric // Avoid 64-bit math as much as possible. 1880eae32dcSDimitry Andric // Returning static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)) would 1890eae32dcSDimitry Andric // perform 32x64-bit multiplication and 64-bit subtraction. 1900eae32dcSDimitry Andric // __x and 1000000000 * __div1e9(__x) are guaranteed to differ by 1910eae32dcSDimitry Andric // less than 10^9, so their highest 32 bits must be identical, 1920eae32dcSDimitry Andric // so we can truncate both sides to uint32_t before subtracting. 1930eae32dcSDimitry Andric // We can also simplify static_cast<uint32_t>(1000000000 * __div1e9(__x)). 1940eae32dcSDimitry Andric // We can truncate before multiplying instead of after, as multiplying 1950eae32dcSDimitry Andric // the highest 32 bits of __div1e9(__x) can't affect the lowest 32 bits. 1960eae32dcSDimitry Andric return static_cast<uint32_t>(__x) - 1000000000 * static_cast<uint32_t>(__div1e9(__x)); 1970eae32dcSDimitry Andric } 1980eae32dcSDimitry Andric 1990eae32dcSDimitry Andric #else // ^^^ 32-bit ^^^ / vvv 64-bit vvv 2000eae32dcSDimitry Andric 2010eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) { 2020eae32dcSDimitry Andric return __x / 5; 2030eae32dcSDimitry Andric } 2040eae32dcSDimitry Andric 2050eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) { 2060eae32dcSDimitry Andric return __x / 10; 2070eae32dcSDimitry Andric } 2080eae32dcSDimitry Andric 2090eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) { 2100eae32dcSDimitry Andric return __x / 100; 2110eae32dcSDimitry Andric } 2120eae32dcSDimitry Andric 2130eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) { 2140eae32dcSDimitry Andric return __x / 100000000; 2150eae32dcSDimitry Andric } 2160eae32dcSDimitry Andric 2170eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) { 2180eae32dcSDimitry Andric return __x / 1000000000; 2190eae32dcSDimitry Andric } 2200eae32dcSDimitry Andric 2210eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) { 2220eae32dcSDimitry Andric return static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)); 2230eae32dcSDimitry Andric } 2240eae32dcSDimitry Andric 2250eae32dcSDimitry Andric #endif // ^^^ 64-bit ^^^ 2260eae32dcSDimitry Andric 2270eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint64_t __value) { 2280eae32dcSDimitry Andric uint32_t __count = 0; 2290eae32dcSDimitry Andric for (;;) { 2300eae32dcSDimitry Andric _LIBCPP_ASSERT(__value != 0, ""); 2310eae32dcSDimitry Andric const uint64_t __q = __div5(__value); 2320eae32dcSDimitry Andric const uint32_t __r = static_cast<uint32_t>(__value) - 5 * static_cast<uint32_t>(__q); 2330eae32dcSDimitry Andric if (__r != 0) { 2340eae32dcSDimitry Andric break; 2350eae32dcSDimitry Andric } 2360eae32dcSDimitry Andric __value = __q; 2370eae32dcSDimitry Andric ++__count; 2380eae32dcSDimitry Andric } 2390eae32dcSDimitry Andric return __count; 2400eae32dcSDimitry Andric } 2410eae32dcSDimitry Andric 2420eae32dcSDimitry Andric // Returns true if __value is divisible by 5^__p. 2430eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf5(const uint64_t __value, const uint32_t __p) { 2440eae32dcSDimitry Andric // I tried a case distinction on __p, but there was no performance difference. 2450eae32dcSDimitry Andric return __pow5Factor(__value) >= __p; 2460eae32dcSDimitry Andric } 2470eae32dcSDimitry Andric 2480eae32dcSDimitry Andric // Returns true if __value is divisible by 2^__p. 2490eae32dcSDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) { 2500eae32dcSDimitry Andric _LIBCPP_ASSERT(__value != 0, ""); 2510eae32dcSDimitry Andric _LIBCPP_ASSERT(__p < 64, ""); 2520eae32dcSDimitry Andric // __builtin_ctzll doesn't appear to be faster here. 2530eae32dcSDimitry Andric return (__value & ((1ull << __p) - 1)) == 0; 2540eae32dcSDimitry Andric } 2550eae32dcSDimitry Andric 2560eae32dcSDimitry Andric _LIBCPP_END_NAMESPACE_STD 2570eae32dcSDimitry Andric 2580eae32dcSDimitry Andric // clang-format on 2590eae32dcSDimitry Andric 2600eae32dcSDimitry Andric #endif // _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H 261