1 // Copyright 2018 Ulf Adams 2 // 3 // The contents of this file may be used under the terms of the Apache License, 4 // Version 2.0. 5 // 6 // (See accompanying file LICENSE-Apache or copy at 7 // http://www.apache.org/licenses/LICENSE-2.0) 8 // 9 // Alternatively, the contents of this file may be used under the terms of 10 // the Boost Software License, Version 1.0. 11 // (See accompanying file LICENSE-Boost or copy at 12 // https://www.boost.org/LICENSE_1_0.txt) 13 // 14 // Unless required by applicable law or agreed to in writing, this software 15 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 // KIND, either express or implied. 17 #ifndef RYU_F2S_INTRINSICS_H 18 #define RYU_F2S_INTRINSICS_H 19 20 // Defines RYU_32_BIT_PLATFORM if applicable. 21 22 #if defined(RYU_FLOAT_FULL_TABLE) 23 24 25 #else 26 27 #if defined(RYU_OPTIMIZE_SIZE) 28 #else 29 #endif 30 #define FLOAT_POW5_INV_BITCOUNT (DOUBLE_POW5_INV_BITCOUNT - 64) 31 #define FLOAT_POW5_BITCOUNT (DOUBLE_POW5_BITCOUNT - 64) 32 33 #endif 34 35 static inline uint32_t pow5factor_32(uint32_t value) { 36 uint32_t count = 0; 37 for (;;) { 38 assert(value != 0); 39 const uint32_t q = value / 5; 40 const uint32_t r = value % 5; 41 if (r != 0) { 42 break; 43 } 44 value = q; 45 ++count; 46 } 47 return count; 48 } 49 50 // Returns true if value is divisible by 5^p. 51 static inline bool multipleOfPowerOf5_32(const uint32_t value, const uint32_t p) { 52 return pow5factor_32(value) >= p; 53 } 54 55 // Returns true if value is divisible by 2^p. 56 static inline bool multipleOfPowerOf2_32(const uint32_t value, const uint32_t p) { 57 // __builtin_ctz doesn't appear to be faster here. 58 return (value & ((1u << p) - 1)) == 0; 59 } 60 61 // It seems to be slightly faster to avoid uint128_t here, although the 62 // generated code for uint128_t looks slightly nicer. 63 static inline uint32_t mulShift32(const uint32_t m, const uint64_t factor, const int32_t shift) { 64 assert(shift > 32); 65 66 // The casts here help MSVC to avoid calls to the __allmul library 67 // function. 68 const uint32_t factorLo = (uint32_t)(factor); 69 const uint32_t factorHi = (uint32_t)(factor >> 32); 70 const uint64_t bits0 = (uint64_t)m * factorLo; 71 const uint64_t bits1 = (uint64_t)m * factorHi; 72 73 #if defined(RYU_32_BIT_PLATFORM) 74 // On 32-bit platforms we can avoid a 64-bit shift-right since we only 75 // need the upper 32 bits of the result and the shift value is > 32. 76 const uint32_t bits0Hi = (uint32_t)(bits0 >> 32); 77 uint32_t bits1Lo = (uint32_t)(bits1); 78 uint32_t bits1Hi = (uint32_t)(bits1 >> 32); 79 bits1Lo += bits0Hi; 80 bits1Hi += (bits1Lo < bits0Hi); 81 if (shift >= 64) { 82 // s2f can call this with a shift value >= 64, which we have to handle. 83 // This could now be slower than the !defined(RYU_32_BIT_PLATFORM) case. 84 return (uint32_t)(bits1Hi >> (shift - 64)); 85 } else { 86 const int32_t s = shift - 32; 87 return (bits1Hi << (32 - s)) | (bits1Lo >> s); 88 } 89 #else // RYU_32_BIT_PLATFORM 90 const uint64_t sum = (bits0 >> 32) + bits1; 91 const uint64_t shiftedSum = sum >> (shift - 32); 92 assert(shiftedSum <= UINT32_MAX); 93 return (uint32_t) shiftedSum; 94 #endif // RYU_32_BIT_PLATFORM 95 } 96 97 static inline uint32_t mulPow5InvDivPow2(const uint32_t m, const uint32_t q, const int32_t j) { 98 #if defined(RYU_FLOAT_FULL_TABLE) 99 return mulShift32(m, FLOAT_POW5_INV_SPLIT[q], j); 100 #elif defined(RYU_OPTIMIZE_SIZE) 101 // The inverse multipliers are defined as [2^x / 5^y] + 1; the upper 64 bits from the double lookup 102 // table are the correct bits for [2^x / 5^y], so we have to add 1 here. Note that we rely on the 103 // fact that the added 1 that's already stored in the table never overflows into the upper 64 bits. 104 uint64_t pow5[2]; 105 double_computeInvPow5(q, pow5); 106 return mulShift32(m, pow5[1] + 1, j); 107 #else 108 return mulShift32(m, DOUBLE_POW5_INV_SPLIT[q][1] + 1, j); 109 #endif 110 } 111 112 static inline uint32_t mulPow5divPow2(const uint32_t m, const uint32_t i, const int32_t j) { 113 #if defined(RYU_FLOAT_FULL_TABLE) 114 return mulShift32(m, FLOAT_POW5_SPLIT[i], j); 115 #elif defined(RYU_OPTIMIZE_SIZE) 116 uint64_t pow5[2]; 117 double_computePow5(i, pow5); 118 return mulShift32(m, pow5[1], j); 119 #else 120 return mulShift32(m, DOUBLE_POW5_SPLIT[i][1], j); 121 #endif 122 } 123 124 #endif // RYU_F2S_INTRINSICS_H 125