1c120edc7SMichael Jones //===-- x86_64 floating point env manipulation functions --------*- C++ -*-===// 2c120edc7SMichael Jones // 3c120edc7SMichael Jones // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4c120edc7SMichael Jones // See https://llvm.org/LICENSE.txt for license information. 5c120edc7SMichael Jones // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6c120edc7SMichael Jones // 7c120edc7SMichael Jones //===----------------------------------------------------------------------===// 8c120edc7SMichael Jones 9270547f3SGuillaume Chatelet #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENVIMPL_H 10270547f3SGuillaume Chatelet #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENVIMPL_H 11c120edc7SMichael Jones 12e2f8c556SGuillaume Chatelet #include "src/__support/macros/attributes.h" // LIBC_INLINE 13*5ff3ff33SPetr Hosek #include "src/__support/macros/config.h" 14f100ec25SGuillaume Chatelet #include "src/__support/macros/properties/architectures.h" 1571405d90SGuillaume Chatelet 16a2569a76SGuillaume Chatelet #if !defined(LIBC_TARGET_ARCH_IS_X86) 1771405d90SGuillaume Chatelet #error "Invalid include" 1871405d90SGuillaume Chatelet #endif 1971405d90SGuillaume Chatelet 20c120edc7SMichael Jones #include <stdint.h> 21c120edc7SMichael Jones 2275bbf4ddSJob Henandez Lara #include "hdr/types/fenv_t.h" 2364730095SGuillaume Chatelet #include "src/__support/macros/sanitizer.h" 24c120edc7SMichael Jones 25*5ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL { 26c120edc7SMichael Jones namespace fputil { 27c120edc7SMichael Jones 28c120edc7SMichael Jones namespace internal { 29c120edc7SMichael Jones 30c120edc7SMichael Jones // Normally, one should be able to define FE_* macros to the exact rounding mode 31c120edc7SMichael Jones // encodings. However, since we want LLVM libc to be compiled against headers 32c120edc7SMichael Jones // from other libcs, we cannot assume that FE_* macros are always defined in 33c120edc7SMichael Jones // such a manner. So, we will define enums corresponding to the x86_64 bit 34c120edc7SMichael Jones // encodings. The implementations can map from FE_* to the corresponding enum 35c120edc7SMichael Jones // values. 36c120edc7SMichael Jones 37c120edc7SMichael Jones // The rounding control values in the x87 control register and the MXCSR 38c120edc7SMichael Jones // register have the same 2-bit enoding but have different bit positions. 39c120edc7SMichael Jones // See below for the bit positions. 40c120edc7SMichael Jones struct RoundingControlValue { 411c92911eSMichael Jones static constexpr uint16_t TO_NEAREST = 0x0; 421c92911eSMichael Jones static constexpr uint16_t DOWNWARD = 0x1; 431c92911eSMichael Jones static constexpr uint16_t UPWARD = 0x2; 441c92911eSMichael Jones static constexpr uint16_t TOWARD_ZERO = 0x3; 45c120edc7SMichael Jones }; 46c120edc7SMichael Jones 471c92911eSMichael Jones static constexpr uint16_t X87_ROUNDING_CONTROL_BIT_POSITION = 10; 481c92911eSMichael Jones static constexpr uint16_t MXCSR_ROUNDING_CONTROL_BIT_POSITION = 13; 49c120edc7SMichael Jones 50c120edc7SMichael Jones // The exception flags in the x87 status register and the MXCSR have the same 51c120edc7SMichael Jones // encoding as well as the same bit positions. 52c120edc7SMichael Jones struct ExceptionFlags { 53073534cbSMichael Jones static constexpr uint16_t INVALID_F = 0x1; 54c120edc7SMichael Jones // Some libcs define __FE_DENORM corresponding to the denormal input 55c120edc7SMichael Jones // exception and include it in FE_ALL_EXCEPTS. We define and use it to 56c120edc7SMichael Jones // support compiling against headers provided by such libcs. 57073534cbSMichael Jones static constexpr uint16_t DENORMAL_F = 0x2; 58073534cbSMichael Jones static constexpr uint16_t DIV_BY_ZERO_F = 0x4; 59073534cbSMichael Jones static constexpr uint16_t OVERFLOW_F = 0x8; 60073534cbSMichael Jones static constexpr uint16_t UNDERFLOW_F = 0x10; 61073534cbSMichael Jones static constexpr uint16_t INEXACT_F = 0x20; 62c120edc7SMichael Jones }; 63c120edc7SMichael Jones 64c120edc7SMichael Jones // The exception control bits occupy six bits, one bit for each exception. 65c120edc7SMichael Jones // In the x87 control word, they occupy the first 6 bits. In the MXCSR 66c120edc7SMichael Jones // register, they occupy bits 7 to 12. 671c92911eSMichael Jones static constexpr uint16_t X87_EXCEPTION_CONTROL_BIT_POSITION = 0; 68805899e6SMichael Jones static constexpr uint16_t X87_EXCEPTION_CONTROL_BIT_POSITION_HIGH = 24; 691c92911eSMichael Jones static constexpr uint16_t MXCSR_EXCEPTION_CONTOL_BIT_POISTION = 7; 70c120edc7SMichael Jones 71c120edc7SMichael Jones // Exception flags are individual bits in the corresponding registers. 72c120edc7SMichael Jones // So, we just OR the bit values to get the full set of exceptions. 7359c809cdSSiva Chandra Reddy LIBC_INLINE uint16_t get_status_value_for_except(int excepts) { 74c120edc7SMichael Jones // We will make use of the fact that exception control bits are single 75c120edc7SMichael Jones // bit flags in the control registers. 76a98a6e95Sluolent return ((excepts & FE_INVALID) ? ExceptionFlags::INVALID_F : 0) | 77c120edc7SMichael Jones #ifdef __FE_DENORM 78a98a6e95Sluolent ((excepts & __FE_DENORM) ? ExceptionFlags::DENORMAL_F : 0) | 79c120edc7SMichael Jones #endif // __FE_DENORM 80a98a6e95Sluolent ((excepts & FE_DIVBYZERO) ? ExceptionFlags::DIV_BY_ZERO_F : 0) | 81a98a6e95Sluolent ((excepts & FE_OVERFLOW) ? ExceptionFlags::OVERFLOW_F : 0) | 82a98a6e95Sluolent ((excepts & FE_UNDERFLOW) ? ExceptionFlags::UNDERFLOW_F : 0) | 83a98a6e95Sluolent ((excepts & FE_INEXACT) ? ExceptionFlags::INEXACT_F : 0); 84c120edc7SMichael Jones } 85c120edc7SMichael Jones 8659c809cdSSiva Chandra Reddy LIBC_INLINE int exception_status_to_macro(uint16_t status) { 87a98a6e95Sluolent return ((status & ExceptionFlags::INVALID_F) ? FE_INVALID : 0) | 88c120edc7SMichael Jones #ifdef __FE_DENORM 89a98a6e95Sluolent ((status & ExceptionFlags::DENORMAL_F) ? __FE_DENORM : 0) | 90c120edc7SMichael Jones #endif // __FE_DENORM 91a98a6e95Sluolent ((status & ExceptionFlags::DIV_BY_ZERO_F) ? FE_DIVBYZERO : 0) | 92a98a6e95Sluolent ((status & ExceptionFlags::OVERFLOW_F) ? FE_OVERFLOW : 0) | 93a98a6e95Sluolent ((status & ExceptionFlags::UNDERFLOW_F) ? FE_UNDERFLOW : 0) | 94a98a6e95Sluolent ((status & ExceptionFlags::INEXACT_F) ? FE_INEXACT : 0); 95c120edc7SMichael Jones } 96c120edc7SMichael Jones 97c120edc7SMichael Jones struct X87StateDescriptor { 981c92911eSMichael Jones uint16_t control_word; 991c92911eSMichael Jones uint16_t unused1; 1001c92911eSMichael Jones uint16_t status_word; 1011c92911eSMichael Jones uint16_t unused2; 102c120edc7SMichael Jones // TODO: Elaborate the remaining 20 bytes as required. 103c120edc7SMichael Jones uint32_t _[5]; 104c120edc7SMichael Jones }; 105c120edc7SMichael Jones 10659c809cdSSiva Chandra Reddy LIBC_INLINE uint16_t get_x87_control_word() { 107c120edc7SMichael Jones uint16_t w; 108c120edc7SMichael Jones __asm__ __volatile__("fnstcw %0" : "=m"(w)::); 109e2263f14SGuillaume Chatelet MSAN_UNPOISON(&w, sizeof(w)); 110c120edc7SMichael Jones return w; 111c120edc7SMichael Jones } 112c120edc7SMichael Jones 11359c809cdSSiva Chandra Reddy LIBC_INLINE void write_x87_control_word(uint16_t w) { 114c120edc7SMichael Jones __asm__ __volatile__("fldcw %0" : : "m"(w) :); 115c120edc7SMichael Jones } 116c120edc7SMichael Jones 11759c809cdSSiva Chandra Reddy LIBC_INLINE uint16_t get_x87_status_word() { 118c120edc7SMichael Jones uint16_t w; 119c120edc7SMichael Jones __asm__ __volatile__("fnstsw %0" : "=m"(w)::); 120e2263f14SGuillaume Chatelet MSAN_UNPOISON(&w, sizeof(w)); 121c120edc7SMichael Jones return w; 122c120edc7SMichael Jones } 123c120edc7SMichael Jones 12459c809cdSSiva Chandra Reddy LIBC_INLINE void clear_x87_exceptions() { 125c120edc7SMichael Jones __asm__ __volatile__("fnclex" : : :); 126c120edc7SMichael Jones } 127c120edc7SMichael Jones 12859c809cdSSiva Chandra Reddy LIBC_INLINE uint32_t get_mxcsr() { 129c120edc7SMichael Jones uint32_t w; 130c120edc7SMichael Jones __asm__ __volatile__("stmxcsr %0" : "=m"(w)::); 131e2263f14SGuillaume Chatelet MSAN_UNPOISON(&w, sizeof(w)); 132c120edc7SMichael Jones return w; 133c120edc7SMichael Jones } 134c120edc7SMichael Jones 13559c809cdSSiva Chandra Reddy LIBC_INLINE void write_mxcsr(uint32_t w) { 136c120edc7SMichael Jones __asm__ __volatile__("ldmxcsr %0" : : "m"(w) :); 137c120edc7SMichael Jones } 138c120edc7SMichael Jones 13959c809cdSSiva Chandra Reddy LIBC_INLINE void get_x87_state_descriptor(X87StateDescriptor &s) { 140c120edc7SMichael Jones __asm__ __volatile__("fnstenv %0" : "=m"(s)); 141e2263f14SGuillaume Chatelet MSAN_UNPOISON(&s, sizeof(s)); 142c120edc7SMichael Jones } 143c120edc7SMichael Jones 14459c809cdSSiva Chandra Reddy LIBC_INLINE void write_x87_state_descriptor(const X87StateDescriptor &s) { 145c120edc7SMichael Jones __asm__ __volatile__("fldenv %0" : : "m"(s) :); 146c120edc7SMichael Jones } 147c120edc7SMichael Jones 14859c809cdSSiva Chandra Reddy LIBC_INLINE void fwait() { __asm__ __volatile__("fwait"); } 149c120edc7SMichael Jones 150c120edc7SMichael Jones } // namespace internal 151c120edc7SMichael Jones 15259c809cdSSiva Chandra Reddy LIBC_INLINE int enable_except(int excepts) { 153c120edc7SMichael Jones // In the x87 control word and in MXCSR, an exception is blocked 154c120edc7SMichael Jones // if the corresponding bit is set. That is the reason for all the 155c120edc7SMichael Jones // bit-flip operations below as we need to turn the bits to zero 156c120edc7SMichael Jones // to enable them. 157c120edc7SMichael Jones 1581c92911eSMichael Jones uint16_t bit_mask = internal::get_status_value_for_except(excepts); 159c120edc7SMichael Jones 1601c92911eSMichael Jones uint16_t x87_cw = internal::get_x87_control_word(); 1611c92911eSMichael Jones uint16_t old_excepts = ~x87_cw & 0x3F; // Save previously enabled exceptions. 1621c92911eSMichael Jones x87_cw &= ~bit_mask; 1631c92911eSMichael Jones internal::write_x87_control_word(x87_cw); 164c120edc7SMichael Jones 165c120edc7SMichael Jones // Enabling SSE exceptions via MXCSR is a nice thing to do but 166c120edc7SMichael Jones // might not be of much use practically as SSE exceptions and the x87 167c120edc7SMichael Jones // exceptions are independent of each other. 1681c92911eSMichael Jones uint32_t mxcsr = internal::get_mxcsr(); 1691c92911eSMichael Jones mxcsr &= ~(bit_mask << internal::MXCSR_EXCEPTION_CONTOL_BIT_POISTION); 1701c92911eSMichael Jones internal::write_mxcsr(mxcsr); 171c120edc7SMichael Jones 172c120edc7SMichael Jones // Since the x87 exceptions and SSE exceptions are independent of each, 173c120edc7SMichael Jones // it doesn't make much sence to report both in the return value. Most 174c120edc7SMichael Jones // often, the standard floating point functions deal with FPU operations 175c120edc7SMichael Jones // so we will retrun only the old x87 exceptions. 1761c92911eSMichael Jones return internal::exception_status_to_macro(old_excepts); 177c120edc7SMichael Jones } 178c120edc7SMichael Jones 17959c809cdSSiva Chandra Reddy LIBC_INLINE int disable_except(int excepts) { 180c120edc7SMichael Jones // In the x87 control word and in MXCSR, an exception is blocked 181c120edc7SMichael Jones // if the corresponding bit is set. 182c120edc7SMichael Jones 1831c92911eSMichael Jones uint16_t bit_mask = internal::get_status_value_for_except(excepts); 184c120edc7SMichael Jones 1851c92911eSMichael Jones uint16_t x87_cw = internal::get_x87_control_word(); 1861c92911eSMichael Jones uint16_t old_excepts = ~x87_cw & 0x3F; // Save previously enabled exceptions. 1871c92911eSMichael Jones x87_cw |= bit_mask; 1881c92911eSMichael Jones internal::write_x87_control_word(x87_cw); 189c120edc7SMichael Jones 1901c92911eSMichael Jones // Just like in enable_except, it is not clear if disabling SSE exceptions 191c120edc7SMichael Jones // is required. But, we will still do it only as a "nice thing to do". 1921c92911eSMichael Jones uint32_t mxcsr = internal::get_mxcsr(); 1931c92911eSMichael Jones mxcsr |= (bit_mask << internal::MXCSR_EXCEPTION_CONTOL_BIT_POISTION); 1941c92911eSMichael Jones internal::write_mxcsr(mxcsr); 195c120edc7SMichael Jones 1961c92911eSMichael Jones return internal::exception_status_to_macro(old_excepts); 197c120edc7SMichael Jones } 198c120edc7SMichael Jones 19959c809cdSSiva Chandra Reddy LIBC_INLINE int get_except() { 200122da690SAlex Brachet uint16_t mxcsr = static_cast<uint16_t>(internal::get_mxcsr()); 201805899e6SMichael Jones uint16_t enabled_excepts = ~(mxcsr >> 7) & 0x3F; 2021c92911eSMichael Jones return internal::exception_status_to_macro(enabled_excepts); 203c5cfbe40SSiva Chandra Reddy } 204c5cfbe40SSiva Chandra Reddy 20559c809cdSSiva Chandra Reddy LIBC_INLINE int clear_except(int excepts) { 206c120edc7SMichael Jones internal::X87StateDescriptor state; 2071c92911eSMichael Jones internal::get_x87_state_descriptor(state); 20840a55fffSAlex Brachet state.status_word &= 20940a55fffSAlex Brachet static_cast<uint16_t>(~internal::get_status_value_for_except(excepts)); 2101c92911eSMichael Jones internal::write_x87_state_descriptor(state); 211c120edc7SMichael Jones 2121c92911eSMichael Jones uint32_t mxcsr = internal::get_mxcsr(); 2131c92911eSMichael Jones mxcsr &= ~internal::get_status_value_for_except(excepts); 2141c92911eSMichael Jones internal::write_mxcsr(mxcsr); 215c120edc7SMichael Jones return 0; 216c120edc7SMichael Jones } 217c120edc7SMichael Jones 21859c809cdSSiva Chandra Reddy LIBC_INLINE int test_except(int excepts) { 21950414422SDominic Chen uint16_t status_word = internal::get_x87_status_word(); 22050414422SDominic Chen uint32_t mxcsr = internal::get_mxcsr(); 221c120edc7SMichael Jones // Check both x87 status word and MXCSR. 22250414422SDominic Chen uint16_t status_value = internal::get_status_value_for_except(excepts); 22340a55fffSAlex Brachet return internal::exception_status_to_macro( 22450414422SDominic Chen static_cast<uint16_t>(status_value & (status_word | mxcsr))); 225c120edc7SMichael Jones } 226c120edc7SMichael Jones 227c120edc7SMichael Jones // Sets the exception flags but does not trigger the exception handler. 22859c809cdSSiva Chandra Reddy LIBC_INLINE int set_except(int excepts) { 2291c92911eSMichael Jones uint16_t status_value = internal::get_status_value_for_except(excepts); 230c120edc7SMichael Jones internal::X87StateDescriptor state; 2311c92911eSMichael Jones internal::get_x87_state_descriptor(state); 2321c92911eSMichael Jones state.status_word |= status_value; 2331c92911eSMichael Jones internal::write_x87_state_descriptor(state); 234c120edc7SMichael Jones 2351c92911eSMichael Jones uint32_t mxcsr = internal::get_mxcsr(); 2361c92911eSMichael Jones mxcsr |= status_value; 2371c92911eSMichael Jones internal::write_mxcsr(mxcsr); 238c120edc7SMichael Jones 239c120edc7SMichael Jones return 0; 240c120edc7SMichael Jones } 241c120edc7SMichael Jones 24259c809cdSSiva Chandra Reddy LIBC_INLINE int raise_except(int excepts) { 2431c92911eSMichael Jones uint16_t status_value = internal::get_status_value_for_except(excepts); 244c120edc7SMichael Jones 245c120edc7SMichael Jones // We set the status flag for exception one at a time and call the 246c120edc7SMichael Jones // fwait instruction to actually get the processor to raise the 247c120edc7SMichael Jones // exception by calling the exception handler. This scheme is per 248678e3ee1SFangrui Song // the description in "8.6 X87 FPU EXCEPTION SYNCHRONIZATION" 249c120edc7SMichael Jones // of the "Intel 64 and IA-32 Architectures Software Developer's 250c120edc7SMichael Jones // Manual, Vol 1". 251c120edc7SMichael Jones 252d4a01549SJay Foad // FPU status word is read for each exception separately as the 253c120edc7SMichael Jones // exception handler can potentially write to it (typically to clear 254c120edc7SMichael Jones // the corresponding exception flag). By reading it separately, we 255c120edc7SMichael Jones // ensure that the writes by the exception handler are maintained 256c120edc7SMichael Jones // when raising the next exception. 257c120edc7SMichael Jones 2581c92911eSMichael Jones auto raise_helper = [](uint16_t singleExceptFlag) { 259c120edc7SMichael Jones internal::X87StateDescriptor state; 260805899e6SMichael Jones uint32_t mxcsr = 0; 2611c92911eSMichael Jones internal::get_x87_state_descriptor(state); 262805899e6SMichael Jones mxcsr = internal::get_mxcsr(); 2631c92911eSMichael Jones state.status_word |= singleExceptFlag; 264805899e6SMichael Jones mxcsr |= singleExceptFlag; 2651c92911eSMichael Jones internal::write_x87_state_descriptor(state); 266805899e6SMichael Jones internal::write_mxcsr(mxcsr); 267c120edc7SMichael Jones internal::fwait(); 268c120edc7SMichael Jones }; 269c120edc7SMichael Jones 270073534cbSMichael Jones if (status_value & internal::ExceptionFlags::INVALID_F) 271073534cbSMichael Jones raise_helper(internal::ExceptionFlags::INVALID_F); 272073534cbSMichael Jones if (status_value & internal::ExceptionFlags::DIV_BY_ZERO_F) 273073534cbSMichael Jones raise_helper(internal::ExceptionFlags::DIV_BY_ZERO_F); 274073534cbSMichael Jones if (status_value & internal::ExceptionFlags::OVERFLOW_F) 275073534cbSMichael Jones raise_helper(internal::ExceptionFlags::OVERFLOW_F); 276073534cbSMichael Jones if (status_value & internal::ExceptionFlags::UNDERFLOW_F) 277073534cbSMichael Jones raise_helper(internal::ExceptionFlags::UNDERFLOW_F); 278073534cbSMichael Jones if (status_value & internal::ExceptionFlags::INEXACT_F) 279073534cbSMichael Jones raise_helper(internal::ExceptionFlags::INEXACT_F); 280c120edc7SMichael Jones #ifdef __FE_DENORM 281073534cbSMichael Jones if (status_value & internal::ExceptionFlags::DENORMAL_F) { 282073534cbSMichael Jones raise_helper(internal::ExceptionFlags::DENORMAL_F); 283c120edc7SMichael Jones } 284c120edc7SMichael Jones #endif // __FE_DENORM 285c120edc7SMichael Jones 286c120edc7SMichael Jones // There is no special synchronization scheme available to 287c120edc7SMichael Jones // raise SEE exceptions. So, we will ignore that for now. 288c120edc7SMichael Jones // Just plain writing to the MXCSR register does not guarantee 289c120edc7SMichael Jones // the exception handler will be called. 290c120edc7SMichael Jones 291c120edc7SMichael Jones return 0; 292c120edc7SMichael Jones } 293c120edc7SMichael Jones 29459c809cdSSiva Chandra Reddy LIBC_INLINE int get_round() { 2951c92911eSMichael Jones uint16_t bit_value = 2961c92911eSMichael Jones (internal::get_mxcsr() >> internal::MXCSR_ROUNDING_CONTROL_BIT_POSITION) & 2971c92911eSMichael Jones 0x3; 2981c92911eSMichael Jones switch (bit_value) { 2991c92911eSMichael Jones case internal::RoundingControlValue::TO_NEAREST: 300c120edc7SMichael Jones return FE_TONEAREST; 3011c92911eSMichael Jones case internal::RoundingControlValue::DOWNWARD: 302c120edc7SMichael Jones return FE_DOWNWARD; 3031c92911eSMichael Jones case internal::RoundingControlValue::UPWARD: 304c120edc7SMichael Jones return FE_UPWARD; 3051c92911eSMichael Jones case internal::RoundingControlValue::TOWARD_ZERO: 306c120edc7SMichael Jones return FE_TOWARDZERO; 307c120edc7SMichael Jones default: 308c120edc7SMichael Jones return -1; // Error value. 309c120edc7SMichael Jones } 310c120edc7SMichael Jones } 311c120edc7SMichael Jones 31259c809cdSSiva Chandra Reddy LIBC_INLINE int set_round(int mode) { 3131c92911eSMichael Jones uint16_t bit_value; 314c120edc7SMichael Jones switch (mode) { 315c120edc7SMichael Jones case FE_TONEAREST: 3161c92911eSMichael Jones bit_value = internal::RoundingControlValue::TO_NEAREST; 317c120edc7SMichael Jones break; 318c120edc7SMichael Jones case FE_DOWNWARD: 3191c92911eSMichael Jones bit_value = internal::RoundingControlValue::DOWNWARD; 320c120edc7SMichael Jones break; 321c120edc7SMichael Jones case FE_UPWARD: 3221c92911eSMichael Jones bit_value = internal::RoundingControlValue::UPWARD; 323c120edc7SMichael Jones break; 324c120edc7SMichael Jones case FE_TOWARDZERO: 3251c92911eSMichael Jones bit_value = internal::RoundingControlValue::TOWARD_ZERO; 326c120edc7SMichael Jones break; 327c120edc7SMichael Jones default: 328c120edc7SMichael Jones return 1; // To indicate failure 329c120edc7SMichael Jones } 330c120edc7SMichael Jones 3311c92911eSMichael Jones uint16_t x87_value = static_cast<uint16_t>( 3321c92911eSMichael Jones bit_value << internal::X87_ROUNDING_CONTROL_BIT_POSITION); 3331c92911eSMichael Jones uint16_t x87_control = internal::get_x87_control_word(); 3341c92911eSMichael Jones x87_control = static_cast<uint16_t>( 3351c92911eSMichael Jones (x87_control & 3361c92911eSMichael Jones ~(uint16_t(0x3) << internal::X87_ROUNDING_CONTROL_BIT_POSITION)) | 3371c92911eSMichael Jones x87_value); 3381c92911eSMichael Jones internal::write_x87_control_word(x87_control); 339c120edc7SMichael Jones 3401c92911eSMichael Jones uint32_t mxcsr_value = bit_value 3411c92911eSMichael Jones << internal::MXCSR_ROUNDING_CONTROL_BIT_POSITION; 3421c92911eSMichael Jones uint32_t mxcsr_control = internal::get_mxcsr(); 3431c92911eSMichael Jones mxcsr_control = (mxcsr_control & 3441c92911eSMichael Jones ~(0x3 << internal::MXCSR_ROUNDING_CONTROL_BIT_POSITION)) | 3451c92911eSMichael Jones mxcsr_value; 3461c92911eSMichael Jones internal::write_mxcsr(mxcsr_control); 347c120edc7SMichael Jones 348c120edc7SMichael Jones return 0; 349c120edc7SMichael Jones } 350c120edc7SMichael Jones 351c120edc7SMichael Jones namespace internal { 352c120edc7SMichael Jones 35350414422SDominic Chen #if defined(_WIN32) 354c120edc7SMichael Jones // MSVC fenv.h defines a very simple representation of the floating point state 355c120edc7SMichael Jones // which just consists of control and status words of the x87 unit. 356c120edc7SMichael Jones struct FPState { 357805899e6SMichael Jones uint32_t control_word; 358805899e6SMichael Jones uint32_t status_word; 359c120edc7SMichael Jones }; 36050414422SDominic Chen #elif defined(__APPLE__) 36150414422SDominic Chen struct FPState { 36250414422SDominic Chen uint16_t control_word; 36350414422SDominic Chen uint16_t status_word; 36450414422SDominic Chen uint32_t mxcsr; 36550414422SDominic Chen uint8_t reserved[8]; 36650414422SDominic Chen }; 367c120edc7SMichael Jones #else 368c120edc7SMichael Jones struct FPState { 3691c92911eSMichael Jones X87StateDescriptor x87_status; 3701c92911eSMichael Jones uint32_t mxcsr; 371c120edc7SMichael Jones }; 372c120edc7SMichael Jones #endif // _WIN32 373c120edc7SMichael Jones 374c120edc7SMichael Jones } // namespace internal 375c120edc7SMichael Jones 376c120edc7SMichael Jones static_assert( 377c120edc7SMichael Jones sizeof(fenv_t) == sizeof(internal::FPState), 378c120edc7SMichael Jones "Internal floating point state does not match the public fenv_t type."); 379c120edc7SMichael Jones 380c120edc7SMichael Jones #ifdef _WIN32 381805899e6SMichael Jones 382805899e6SMichael Jones // The exception flags in the Windows FEnv struct and the MXCSR have almost 383805899e6SMichael Jones // reversed bit positions. 384805899e6SMichael Jones struct WinExceptionFlags { 385073534cbSMichael Jones static constexpr uint32_t INEXACT_WIN = 0x01; 386073534cbSMichael Jones static constexpr uint32_t UNDERFLOW_WIN = 0x02; 387073534cbSMichael Jones static constexpr uint32_t OVERFLOW_WIN = 0x04; 388073534cbSMichael Jones static constexpr uint32_t DIV_BY_ZERO_WIN = 0x08; 389073534cbSMichael Jones static constexpr uint32_t INVALID_WIN = 0x10; 390073534cbSMichael Jones static constexpr uint32_t DENORMAL_WIN = 0x20; 391805899e6SMichael Jones 392805899e6SMichael Jones // The Windows FEnv struct has a second copy of all of these bits in the high 393805899e6SMichael Jones // byte of the 32 bit control word. These are used as the source of truth when 394805899e6SMichael Jones // calling fesetenv. 395805899e6SMichael Jones static constexpr uint32_t HIGH_OFFSET = 24; 396805899e6SMichael Jones 397073534cbSMichael Jones static constexpr uint32_t HIGH_INEXACT = INEXACT_WIN << HIGH_OFFSET; 398073534cbSMichael Jones static constexpr uint32_t HIGH_UNDERFLOW = UNDERFLOW_WIN << HIGH_OFFSET; 399073534cbSMichael Jones static constexpr uint32_t HIGH_OVERFLOW = OVERFLOW_WIN << HIGH_OFFSET; 400073534cbSMichael Jones static constexpr uint32_t HIGH_DIV_BY_ZERO = DIV_BY_ZERO_WIN << HIGH_OFFSET; 401073534cbSMichael Jones static constexpr uint32_t HIGH_INVALID = INVALID_WIN << HIGH_OFFSET; 402073534cbSMichael Jones static constexpr uint32_t HIGH_DENORMAL = DENORMAL_WIN << HIGH_OFFSET; 403805899e6SMichael Jones }; 404805899e6SMichael Jones 405805899e6SMichael Jones /* 406805899e6SMichael Jones fenv_t control word format: 407805899e6SMichael Jones 408805899e6SMichael Jones Windows (at least for x64) uses a 4 byte control fenv control word stored in 409805899e6SMichael Jones a 32 bit integer. The first byte contains just the rounding mode and the 410805899e6SMichael Jones exception masks, while the last two bytes contain that same information as 411805899e6SMichael Jones well as the flush-to-zero and denormals-are-zero flags. The flags are 412805899e6SMichael Jones represented with a truth table: 413805899e6SMichael Jones 414805899e6SMichael Jones 00 - No flags set 415805899e6SMichael Jones 01 - Flush-to-zero and Denormals-are-zero set 416805899e6SMichael Jones 11 - Flush-to-zero set 417805899e6SMichael Jones 10 - Denormals-are-zero set 418805899e6SMichael Jones 419805899e6SMichael Jones U represents unused. 420805899e6SMichael Jones 421805899e6SMichael Jones +-----Rounding Mode-----+ 422805899e6SMichael Jones | | 423805899e6SMichael Jones ++ ++ 424805899e6SMichael Jones || || 425805899e6SMichael Jones RRMMMMMM UUUUUUUU UUUUFFRR UUMMMMMM 426805899e6SMichael Jones | | || | | 427805899e6SMichael Jones +----+ flags---++ +----+ 428805899e6SMichael Jones | | 429805899e6SMichael Jones +------Exception Masks-----+ 430805899e6SMichael Jones 431805899e6SMichael Jones 432805899e6SMichael Jones fenv_t status word format: 433805899e6SMichael Jones 434805899e6SMichael Jones The status word is a lot simpler for this conversion, since only the 435805899e6SMichael Jones exception flags are used in the MXCSR. 436805899e6SMichael Jones 437805899e6SMichael Jones +----+---Exception Flags---+----+ 438805899e6SMichael Jones | | | | 439805899e6SMichael Jones UUEEEEEE UUUUUUUU UUUUUUUU UUEEEEEE 440805899e6SMichael Jones 441805899e6SMichael Jones 442805899e6SMichael Jones 443805899e6SMichael Jones MXCSR Format: 444805899e6SMichael Jones 445805899e6SMichael Jones The MXCSR format is the same information, just organized differently. Since 446805899e6SMichael Jones the fenv_t struct for windows doesn't include the mxcsr bits, they must be 447805899e6SMichael Jones generated from the control word bits. 448805899e6SMichael Jones 449805899e6SMichael Jones Exception Masks---+ +---Exception Flags 450805899e6SMichael Jones | | 451805899e6SMichael Jones Flush-to-zero---+ +----+ +----+ 452805899e6SMichael Jones | | | | | 453805899e6SMichael Jones FRRMMMMMMDEEEEEE 454805899e6SMichael Jones || | 455805899e6SMichael Jones ++ +---Denormals-are-zero 456805899e6SMichael Jones | 457805899e6SMichael Jones +---Rounding Mode 458805899e6SMichael Jones 459805899e6SMichael Jones 460805899e6SMichael Jones The mask and flag order is as follows: 461805899e6SMichael Jones 462805899e6SMichael Jones fenv_t mxcsr 463805899e6SMichael Jones 464805899e6SMichael Jones denormal inexact 465805899e6SMichael Jones invalid underflow 466805899e6SMichael Jones div by 0 overflow 467805899e6SMichael Jones overflow div by 0 468805899e6SMichael Jones underflow denormal 469805899e6SMichael Jones inexact invalid 470805899e6SMichael Jones 471805899e6SMichael Jones This is almost reverse, except for denormal and invalid which are in the 472805899e6SMichael Jones same order in both. 473805899e6SMichael Jones */ 474805899e6SMichael Jones 47559c809cdSSiva Chandra Reddy LIBC_INLINE int get_env(fenv_t *envp) { 476c120edc7SMichael Jones internal::FPState *state = reinterpret_cast<internal::FPState *>(envp); 477805899e6SMichael Jones 478805899e6SMichael Jones uint32_t status_word = 0; 479805899e6SMichael Jones uint32_t control_word = 0; 480805899e6SMichael Jones 481805899e6SMichael Jones uint32_t mxcsr = internal::get_mxcsr(); 482805899e6SMichael Jones 483805899e6SMichael Jones // Set exception flags in the status word 484073534cbSMichael Jones status_word |= (mxcsr & (internal::ExceptionFlags::INVALID_F | 485073534cbSMichael Jones internal::ExceptionFlags::DENORMAL_F)) 486805899e6SMichael Jones << 4; 487073534cbSMichael Jones status_word |= (mxcsr & internal::ExceptionFlags::DIV_BY_ZERO_F) << 1; 488073534cbSMichael Jones status_word |= (mxcsr & internal::ExceptionFlags::OVERFLOW_F) >> 1; 489073534cbSMichael Jones status_word |= (mxcsr & internal::ExceptionFlags::UNDERFLOW_F) >> 3; 490073534cbSMichael Jones status_word |= (mxcsr & internal::ExceptionFlags::INEXACT_F) >> 5; 491805899e6SMichael Jones status_word |= status_word << WinExceptionFlags::HIGH_OFFSET; 492805899e6SMichael Jones 493805899e6SMichael Jones // Set exception masks in bits 0-5 and 24-29 494073534cbSMichael Jones control_word |= (mxcsr & ((internal::ExceptionFlags::INVALID_F | 495073534cbSMichael Jones internal::ExceptionFlags::DENORMAL_F) 496805899e6SMichael Jones << 7)) >> 497805899e6SMichael Jones 3; 498073534cbSMichael Jones control_word |= (mxcsr & (internal::ExceptionFlags::DIV_BY_ZERO_F << 7)) >> 6; 499073534cbSMichael Jones control_word |= (mxcsr & (internal::ExceptionFlags::OVERFLOW_F << 7)) >> 8; 500073534cbSMichael Jones control_word |= (mxcsr & (internal::ExceptionFlags::UNDERFLOW_F << 7)) >> 10; 501073534cbSMichael Jones control_word |= (mxcsr & (internal::ExceptionFlags::INEXACT_F << 7)) >> 12; 502805899e6SMichael Jones control_word |= control_word << WinExceptionFlags::HIGH_OFFSET; 503805899e6SMichael Jones 504805899e6SMichael Jones // Set rounding in bits 8-9 and 30-31 505805899e6SMichael Jones control_word |= (mxcsr & 0x6000) >> 5; 506805899e6SMichael Jones control_word |= (mxcsr & 0x6000) << 17; 507805899e6SMichael Jones 508805899e6SMichael Jones // Set flush-to-zero in bit 10 509805899e6SMichael Jones control_word |= (mxcsr & 0x8000) >> 5; 510805899e6SMichael Jones 511805899e6SMichael Jones // Set denormals-are-zero xor flush-to-zero in bit 11 512805899e6SMichael Jones control_word |= (((mxcsr & 0x8000) >> 9) ^ (mxcsr & 0x0040)) << 5; 513805899e6SMichael Jones 514805899e6SMichael Jones state->control_word = control_word; 515805899e6SMichael Jones state->status_word = status_word; 516c120edc7SMichael Jones return 0; 517c120edc7SMichael Jones } 518c120edc7SMichael Jones 51959c809cdSSiva Chandra Reddy LIBC_INLINE int set_env(const fenv_t *envp) { 520c120edc7SMichael Jones const internal::FPState *state = 521c120edc7SMichael Jones reinterpret_cast<const internal::FPState *>(envp); 522805899e6SMichael Jones 523805899e6SMichael Jones uint32_t mxcsr = 0; 524805899e6SMichael Jones 525805899e6SMichael Jones // Set exception flags from the status word 526805899e6SMichael Jones mxcsr |= static_cast<uint16_t>( 527805899e6SMichael Jones (state->status_word & 528805899e6SMichael Jones (WinExceptionFlags::HIGH_DENORMAL | WinExceptionFlags::HIGH_INVALID)) >> 529805899e6SMichael Jones 28); 530805899e6SMichael Jones mxcsr |= static_cast<uint16_t>( 531805899e6SMichael Jones (state->status_word & WinExceptionFlags::HIGH_DIV_BY_ZERO) >> 25); 532805899e6SMichael Jones mxcsr |= static_cast<uint16_t>( 533805899e6SMichael Jones (state->status_word & WinExceptionFlags::HIGH_OVERFLOW) >> 23); 534805899e6SMichael Jones mxcsr |= static_cast<uint16_t>( 535805899e6SMichael Jones (state->status_word & WinExceptionFlags::HIGH_UNDERFLOW) >> 21); 536805899e6SMichael Jones mxcsr |= static_cast<uint16_t>( 537805899e6SMichael Jones (state->status_word & WinExceptionFlags::HIGH_INEXACT) >> 19); 538805899e6SMichael Jones 539805899e6SMichael Jones // Set denormals-are-zero from bit 10 xor bit 11 540805899e6SMichael Jones mxcsr |= static_cast<uint16_t>( 541805899e6SMichael Jones (((state->control_word & 0x800) >> 1) ^ (state->control_word & 0x400)) >> 542805899e6SMichael Jones 4); 543805899e6SMichael Jones 544805899e6SMichael Jones // Set exception masks from bits 24-29 545805899e6SMichael Jones mxcsr |= static_cast<uint16_t>( 546805899e6SMichael Jones (state->control_word & 547805899e6SMichael Jones (WinExceptionFlags::HIGH_DENORMAL | WinExceptionFlags::HIGH_INVALID)) >> 548805899e6SMichael Jones 21); 549805899e6SMichael Jones mxcsr |= static_cast<uint16_t>( 550805899e6SMichael Jones (state->control_word & WinExceptionFlags::HIGH_DIV_BY_ZERO) >> 18); 551805899e6SMichael Jones mxcsr |= static_cast<uint16_t>( 552805899e6SMichael Jones (state->control_word & WinExceptionFlags::HIGH_OVERFLOW) >> 16); 553805899e6SMichael Jones mxcsr |= static_cast<uint16_t>( 554805899e6SMichael Jones (state->control_word & WinExceptionFlags::HIGH_UNDERFLOW) >> 14); 555805899e6SMichael Jones mxcsr |= static_cast<uint16_t>( 556805899e6SMichael Jones (state->control_word & WinExceptionFlags::HIGH_INEXACT) >> 12); 557805899e6SMichael Jones 558805899e6SMichael Jones // Set rounding from bits 30-31 559805899e6SMichael Jones mxcsr |= static_cast<uint16_t>((state->control_word & 0xc0000000) >> 17); 560805899e6SMichael Jones 561805899e6SMichael Jones // Set flush-to-zero from bit 10 562805899e6SMichael Jones mxcsr |= static_cast<uint16_t>((state->control_word & 0x400) << 5); 563805899e6SMichael Jones 564805899e6SMichael Jones internal::write_mxcsr(mxcsr); 565c120edc7SMichael Jones return 0; 566c120edc7SMichael Jones } 567c120edc7SMichael Jones #else 56859c809cdSSiva Chandra Reddy LIBC_INLINE int get_env(fenv_t *envp) { 569c120edc7SMichael Jones internal::FPState *state = reinterpret_cast<internal::FPState *>(envp); 57050414422SDominic Chen #ifdef __APPLE__ 57150414422SDominic Chen internal::X87StateDescriptor x87_status; 57250414422SDominic Chen internal::get_x87_state_descriptor(x87_status); 57350414422SDominic Chen state->control_word = x87_status.control_word; 57450414422SDominic Chen state->status_word = x87_status.status_word; 57550414422SDominic Chen #else 5761c92911eSMichael Jones internal::get_x87_state_descriptor(state->x87_status); 57750414422SDominic Chen #endif // __APPLE__ 5781c92911eSMichael Jones state->mxcsr = internal::get_mxcsr(); 579c120edc7SMichael Jones return 0; 580c120edc7SMichael Jones } 581c120edc7SMichael Jones 58259c809cdSSiva Chandra Reddy LIBC_INLINE int set_env(const fenv_t *envp) { 5835c3c716bSSiva Chandra Reddy // envp contains everything including pieces like the current 5845c3c716bSSiva Chandra Reddy // top of FPU stack. We cannot arbitrarily change them. So, we first 5855c3c716bSSiva Chandra Reddy // read the current status and update only those pieces which are 5865c3c716bSSiva Chandra Reddy // not disruptive. 5871c92911eSMichael Jones internal::X87StateDescriptor x87_status; 5881c92911eSMichael Jones internal::get_x87_state_descriptor(x87_status); 5895c3c716bSSiva Chandra Reddy 5905c3c716bSSiva Chandra Reddy if (envp == FE_DFL_ENV) { 5915c3c716bSSiva Chandra Reddy // Reset the exception flags in the status word. 5921c92911eSMichael Jones x87_status.status_word &= ~uint16_t(0x3F); 5935c3c716bSSiva Chandra Reddy // Reset other non-sensitive parts of the status word. 5945c3c716bSSiva Chandra Reddy for (int i = 0; i < 5; i++) 5951c92911eSMichael Jones x87_status._[i] = 0; 5965c3c716bSSiva Chandra Reddy // In the control word, we do the following: 5975c3c716bSSiva Chandra Reddy // 1. Mask all exceptions 5985c3c716bSSiva Chandra Reddy // 2. Set rounding mode to round-to-nearest 5995c3c716bSSiva Chandra Reddy // 3. Set the internal precision to double extended precision. 6001c92911eSMichael Jones x87_status.control_word |= uint16_t(0x3F); // Mask all exceptions. 6011c92911eSMichael Jones x87_status.control_word &= ~(uint16_t(0x3) << 10); // Round to nearest. 6021c92911eSMichael Jones x87_status.control_word |= (uint16_t(0x3) << 8); // Extended precision. 6031c92911eSMichael Jones internal::write_x87_state_descriptor(x87_status); 6045c3c716bSSiva Chandra Reddy 6055c3c716bSSiva Chandra Reddy // We take the exact same approach MXCSR register as well. 6065c3c716bSSiva Chandra Reddy // MXCSR has two additional fields, "flush-to-zero" and 6075c3c716bSSiva Chandra Reddy // "denormals-are-zero". We reset those bits. Also, MXCSR does not 6085c3c716bSSiva Chandra Reddy // have a field which controls the precision of internal operations. 6091c92911eSMichael Jones uint32_t mxcsr = internal::get_mxcsr(); 6105c3c716bSSiva Chandra Reddy mxcsr &= ~uint16_t(0x3F); // Clear exception flags. 6115c3c716bSSiva Chandra Reddy mxcsr &= ~(uint16_t(0x1) << 6); // Reset denormals-are-zero 6125c3c716bSSiva Chandra Reddy mxcsr |= (uint16_t(0x3F) << 7); // Mask exceptions 6135c3c716bSSiva Chandra Reddy mxcsr &= ~(uint16_t(0x3) << 13); // Round to nearest. 6145c3c716bSSiva Chandra Reddy mxcsr &= ~(uint16_t(0x1) << 15); // Reset flush-to-zero 6151c92911eSMichael Jones internal::write_mxcsr(mxcsr); 6165c3c716bSSiva Chandra Reddy 6175c3c716bSSiva Chandra Reddy return 0; 6185c3c716bSSiva Chandra Reddy } 6195c3c716bSSiva Chandra Reddy 6205c3c716bSSiva Chandra Reddy const internal::FPState *fpstate = 621c120edc7SMichael Jones reinterpret_cast<const internal::FPState *>(envp); 6225c3c716bSSiva Chandra Reddy 6235c3c716bSSiva Chandra Reddy // Copy the exception status flags from envp. 6241c92911eSMichael Jones x87_status.status_word &= ~uint16_t(0x3F); 62550414422SDominic Chen #ifdef __APPLE__ 62650414422SDominic Chen x87_status.status_word |= (fpstate->status_word & 0x3F); 62750414422SDominic Chen // We can set the x87 control word as is as there no sensitive bits. 62850414422SDominic Chen x87_status.control_word = fpstate->control_word; 62950414422SDominic Chen #else 6301c92911eSMichael Jones x87_status.status_word |= (fpstate->x87_status.status_word & 0x3F); 6315c3c716bSSiva Chandra Reddy // Copy other non-sensitive parts of the status word. 6325c3c716bSSiva Chandra Reddy for (int i = 0; i < 5; i++) 6331c92911eSMichael Jones x87_status._[i] = fpstate->x87_status._[i]; 6345c3c716bSSiva Chandra Reddy // We can set the x87 control word as is as there no sensitive bits. 6351c92911eSMichael Jones x87_status.control_word = fpstate->x87_status.control_word; 63650414422SDominic Chen #endif // __APPLE__ 6371c92911eSMichael Jones internal::write_x87_state_descriptor(x87_status); 6385c3c716bSSiva Chandra Reddy 6395c3c716bSSiva Chandra Reddy // We can write the MXCSR state as is as there are no sensitive bits. 6401c92911eSMichael Jones internal::write_mxcsr(fpstate->mxcsr); 641c120edc7SMichael Jones return 0; 642c120edc7SMichael Jones } 643c120edc7SMichael Jones #endif 644c120edc7SMichael Jones 645c120edc7SMichael Jones } // namespace fputil 646*5ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL 647c120edc7SMichael Jones 648270547f3SGuillaume Chatelet #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENVIMPL_H 649