1 // Copyright 2018 Ulf Adams 2 // 3 // The contents of this file may be used under the terms of the Apache License, 4 // Version 2.0. 5 // 6 // (See accompanying file LICENSE-Apache or copy at 7 // http://www.apache.org/licenses/LICENSE-2.0) 8 // 9 // Alternatively, the contents of this file may be used under the terms of 10 // the Boost Software License, Version 1.0. 11 // (See accompanying file LICENSE-Boost or copy at 12 // https://www.boost.org/LICENSE_1_0.txt) 13 // 14 // Unless required by applicable law or agreed to in writing, this software 15 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 // KIND, either express or implied. 17 18 // Runtime compiler options: 19 // -DRYU_DEBUG Generate verbose debugging output to stdout. 20 21 22 23 #ifdef RYU_DEBUG 24 #endif 25 26 27 #define FLOAT_MANTISSA_BITS 23 28 #define FLOAT_EXPONENT_BITS 8 29 #define FLOAT_BIAS 127 30 31 // A floating decimal representing m * 10^e. 32 typedef struct floating_decimal_32 { 33 uint32_t mantissa; 34 // Decimal exponent's range is -45 to 38 35 // inclusive, and can fit in a short if needed. 36 int32_t exponent; 37 bool sign; 38 } floating_decimal_32; 39 40 static inline floating_decimal_32 f2d(const uint32_t ieeeMantissa, const uint32_t ieeeExponent, const bool ieeeSign) { 41 int32_t e2; 42 uint32_t m2; 43 if (ieeeExponent == 0) { 44 // We subtract 2 so that the bounds computation has 2 additional bits. 45 e2 = 1 - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; 46 m2 = ieeeMantissa; 47 } else { 48 e2 = (int32_t) ieeeExponent - FLOAT_BIAS - FLOAT_MANTISSA_BITS - 2; 49 m2 = (1u << FLOAT_MANTISSA_BITS) | ieeeMantissa; 50 } 51 const bool even = (m2 & 1) == 0; 52 const bool acceptBounds = even; 53 54 #ifdef RYU_DEBUG 55 printf("-> %u * 2^%d\n", m2, e2 + 2); 56 #endif 57 58 // Step 2: Determine the interval of valid decimal representations. 59 const uint32_t mv = 4 * m2; 60 const uint32_t mp = 4 * m2 + 2; 61 // Implicit bool -> int conversion. True is 1, false is 0. 62 const uint32_t mmShift = ieeeMantissa != 0 || ieeeExponent <= 1; 63 const uint32_t mm = 4 * m2 - 1 - mmShift; 64 65 // Step 3: Convert to a decimal power base using 64-bit arithmetic. 66 uint32_t vr, vp, vm; 67 int32_t e10; 68 bool vmIsTrailingZeros = false; 69 bool vrIsTrailingZeros = false; 70 uint8_t lastRemovedDigit = 0; 71 if (e2 >= 0) { 72 const uint32_t q = log10Pow2(e2); 73 e10 = (int32_t) q; 74 const int32_t k = FLOAT_POW5_INV_BITCOUNT + pow5bits((int32_t) q) - 1; 75 const int32_t i = -e2 + (int32_t) q + k; 76 vr = mulPow5InvDivPow2(mv, q, i); 77 vp = mulPow5InvDivPow2(mp, q, i); 78 vm = mulPow5InvDivPow2(mm, q, i); 79 #ifdef RYU_DEBUG 80 printf("%u * 2^%d / 10^%u\n", mv, e2, q); 81 printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm); 82 #endif 83 if (q != 0 && (vp - 1) / 10 <= vm / 10) { 84 // We need to know one removed digit even if we are not going to loop below. We could use 85 // q = X - 1 above, except that would require 33 bits for the result, and we've found that 86 // 32-bit arithmetic is faster even on 64-bit machines. 87 const int32_t l = FLOAT_POW5_INV_BITCOUNT + pow5bits((int32_t) (q - 1)) - 1; 88 lastRemovedDigit = (uint8_t) (mulPow5InvDivPow2(mv, q - 1, -e2 + (int32_t) q - 1 + l) % 10); 89 } 90 if (q <= 9) { 91 // The largest power of 5 that fits in 24 bits is 5^10, but q <= 9 seems to be safe as well. 92 // Only one of mp, mv, and mm can be a multiple of 5, if any. 93 if (mv % 5 == 0) { 94 vrIsTrailingZeros = multipleOfPowerOf5_32(mv, q); 95 } else if (acceptBounds) { 96 vmIsTrailingZeros = multipleOfPowerOf5_32(mm, q); 97 } else { 98 vp -= multipleOfPowerOf5_32(mp, q); 99 } 100 } 101 } else { 102 const uint32_t q = log10Pow5(-e2); 103 e10 = (int32_t) q + e2; 104 const int32_t i = -e2 - (int32_t) q; 105 const int32_t k = pow5bits(i) - FLOAT_POW5_BITCOUNT; 106 int32_t j = (int32_t) q - k; 107 vr = mulPow5divPow2(mv, (uint32_t) i, j); 108 vp = mulPow5divPow2(mp, (uint32_t) i, j); 109 vm = mulPow5divPow2(mm, (uint32_t) i, j); 110 #ifdef RYU_DEBUG 111 printf("%u * 5^%d / 10^%u\n", mv, -e2, q); 112 printf("%u %d %d %d\n", q, i, k, j); 113 printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm); 114 #endif 115 if (q != 0 && (vp - 1) / 10 <= vm / 10) { 116 j = (int32_t) q - 1 - (pow5bits(i + 1) - FLOAT_POW5_BITCOUNT); 117 lastRemovedDigit = (uint8_t) (mulPow5divPow2(mv, (uint32_t) (i + 1), j) % 10); 118 } 119 if (q <= 1) { 120 // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits. 121 // mv = 4 * m2, so it always has at least two trailing 0 bits. 122 vrIsTrailingZeros = true; 123 if (acceptBounds) { 124 // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1. 125 vmIsTrailingZeros = mmShift == 1; 126 } else { 127 // mp = mv + 2, so it always has at least one trailing 0 bit. 128 --vp; 129 } 130 } else if (q < 31) { // TODO(ulfjack): Use a tighter bound here. 131 vrIsTrailingZeros = multipleOfPowerOf2_32(mv, q - 1); 132 #ifdef RYU_DEBUG 133 printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false"); 134 #endif 135 } 136 } 137 #ifdef RYU_DEBUG 138 printf("e10=%d\n", e10); 139 printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm); 140 printf("vm is trailing zeros=%s\n", vmIsTrailingZeros ? "true" : "false"); 141 printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false"); 142 #endif 143 144 // Step 4: Find the shortest decimal representation in the interval of valid representations. 145 int32_t removed = 0; 146 uint32_t output; 147 if (vmIsTrailingZeros || vrIsTrailingZeros) { 148 // General case, which happens rarely (~4.0%). 149 while (vp / 10 > vm / 10) { 150 #ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=23106 151 // The compiler does not realize that vm % 10 can be computed from vm / 10 152 // as vm - (vm / 10) * 10. 153 vmIsTrailingZeros &= vm - (vm / 10) * 10 == 0; 154 #else 155 vmIsTrailingZeros &= vm % 10 == 0; 156 #endif 157 vrIsTrailingZeros &= lastRemovedDigit == 0; 158 lastRemovedDigit = (uint8_t) (vr % 10); 159 vr /= 10; 160 vp /= 10; 161 vm /= 10; 162 ++removed; 163 } 164 #ifdef RYU_DEBUG 165 printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm); 166 printf("d-10=%s\n", vmIsTrailingZeros ? "true" : "false"); 167 #endif 168 if (vmIsTrailingZeros) { 169 while (vm % 10 == 0) { 170 vrIsTrailingZeros &= lastRemovedDigit == 0; 171 lastRemovedDigit = (uint8_t) (vr % 10); 172 vr /= 10; 173 vp /= 10; 174 vm /= 10; 175 ++removed; 176 } 177 } 178 #ifdef RYU_DEBUG 179 printf("%u %d\n", vr, lastRemovedDigit); 180 printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false"); 181 #endif 182 if (vrIsTrailingZeros && lastRemovedDigit == 5 && vr % 2 == 0) { 183 // Round even if the exact number is .....50..0. 184 lastRemovedDigit = 4; 185 } 186 // We need to take vr + 1 if vr is outside bounds or we need to round up. 187 output = vr + ((vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5); 188 } else { 189 // Specialized for the common case (~96.0%). Percentages below are relative to this. 190 // Loop iterations below (approximately): 191 // 0: 13.6%, 1: 70.7%, 2: 14.1%, 3: 1.39%, 4: 0.14%, 5+: 0.01% 192 while (vp / 10 > vm / 10) { 193 lastRemovedDigit = (uint8_t) (vr % 10); 194 vr /= 10; 195 vp /= 10; 196 vm /= 10; 197 ++removed; 198 } 199 #ifdef RYU_DEBUG 200 printf("%u %d\n", vr, lastRemovedDigit); 201 printf("vr is trailing zeros=%s\n", vrIsTrailingZeros ? "true" : "false"); 202 #endif 203 // We need to take vr + 1 if vr is outside bounds or we need to round up. 204 output = vr + (vr == vm || lastRemovedDigit >= 5); 205 } 206 const int32_t exp = e10 + removed; 207 208 #ifdef RYU_DEBUG 209 printf("V+=%u\nV =%u\nV-=%u\n", vp, vr, vm); 210 printf("O=%u\n", output); 211 printf("EXP=%d\n", exp); 212 #endif 213 214 floating_decimal_32 fd; 215 fd.exponent = exp; 216 fd.mantissa = output; 217 fd.sign = ieeeSign; 218 return fd; 219 } 220 221 static inline int to_chars(const floating_decimal_32 v, char* const result) { 222 // Step 5: Print the decimal representation. 223 int index = 0; 224 if (v.sign) { 225 result[index++] = '-'; 226 } 227 228 uint32_t output = v.mantissa; 229 const uint32_t olength = decimalLength9(output); 230 231 #ifdef RYU_DEBUG 232 printf("DIGITS=%u\n", v.mantissa); 233 printf("OLEN=%u\n", olength); 234 printf("EXP=%u\n", v.exponent + olength); 235 #endif 236 237 // Print the decimal digits. 238 // The following code is equivalent to: 239 // for (uint32_t i = 0; i < olength - 1; ++i) { 240 // const uint32_t c = output % 10; output /= 10; 241 // result[index + olength - i] = (char) ('0' + c); 242 // } 243 // result[index] = '0' + output % 10; 244 uint32_t i = 0; 245 while (output >= 10000) { 246 #ifdef __clang__ // https://bugs.llvm.org/show_bug.cgi?id=38217 247 const uint32_t c = output - 10000 * (output / 10000); 248 #else 249 const uint32_t c = output % 10000; 250 #endif 251 output /= 10000; 252 const uint32_t c0 = (c % 100) << 1; 253 const uint32_t c1 = (c / 100) << 1; 254 memcpy(result + index + olength - i - 1, DIGIT_TABLE + c0, 2); 255 memcpy(result + index + olength - i - 3, DIGIT_TABLE + c1, 2); 256 i += 4; 257 } 258 if (output >= 100) { 259 const uint32_t c = (output % 100) << 1; 260 output /= 100; 261 memcpy(result + index + olength - i - 1, DIGIT_TABLE + c, 2); 262 i += 2; 263 } 264 if (output >= 10) { 265 const uint32_t c = output << 1; 266 // We can't use memcpy here: the decimal dot goes between these two digits. 267 result[index + olength - i] = DIGIT_TABLE[c + 1]; 268 result[index] = DIGIT_TABLE[c]; 269 } else { 270 result[index] = (char) ('0' + output); 271 } 272 273 // Print decimal point if needed. 274 if (olength > 1) { 275 result[index + 1] = '.'; 276 index += olength + 1; 277 } else { 278 ++index; 279 } 280 281 // Print the exponent. 282 result[index++] = 'e'; 283 int32_t exp = v.exponent + (int32_t) olength - 1; 284 if (exp < 0) { 285 result[index++] = '-'; 286 exp = -exp; 287 } else { 288 result[index++] = '+'; 289 } 290 291 memcpy(result + index, DIGIT_TABLE + 2 * exp, 2); 292 index += 2; 293 294 return index; 295 } 296 297 floating_decimal_32 floating_to_fd32(float f) { 298 // Step 1: Decode the floating-point number, and unify normalized and subnormal cases. 299 const uint32_t bits = float_to_bits(f); 300 301 #ifdef RYU_DEBUG 302 printf("IN="); 303 for (int32_t bit = 31; bit >= 0; --bit) { 304 printf("%u", (bits >> bit) & 1); 305 } 306 printf("\n"); 307 #endif 308 309 // Decode bits into sign, mantissa, and exponent. 310 const bool ieeeSign = ((bits >> (FLOAT_MANTISSA_BITS + FLOAT_EXPONENT_BITS)) & 1) != 0; 311 const uint32_t ieeeMantissa = bits & ((1u << FLOAT_MANTISSA_BITS) - 1); 312 const uint32_t ieeeExponent = (bits >> FLOAT_MANTISSA_BITS) & ((1u << FLOAT_EXPONENT_BITS) - 1); 313 314 // Case distinction; exit early for the easy cases. 315 if (ieeeExponent == ((1u << FLOAT_EXPONENT_BITS) - 1u) || (ieeeExponent == 0 && ieeeMantissa == 0)) { 316 __builtin_abort(); 317 } 318 319 const floating_decimal_32 v = f2d(ieeeMantissa, ieeeExponent, ieeeSign); 320 return v; 321 } 322