xref: /llvm-project/libc/src/__support/FPUtil/x86_64/FEnvImpl.h (revision 5ff3ff33ff930e4ec49da7910612d8a41eb068cb)
1c120edc7SMichael Jones //===-- x86_64 floating point env manipulation functions --------*- C++ -*-===//
2c120edc7SMichael Jones //
3c120edc7SMichael Jones // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c120edc7SMichael Jones // See https://llvm.org/LICENSE.txt for license information.
5c120edc7SMichael Jones // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c120edc7SMichael Jones //
7c120edc7SMichael Jones //===----------------------------------------------------------------------===//
8c120edc7SMichael Jones 
9270547f3SGuillaume Chatelet #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENVIMPL_H
10270547f3SGuillaume Chatelet #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENVIMPL_H
11c120edc7SMichael Jones 
12e2f8c556SGuillaume Chatelet #include "src/__support/macros/attributes.h" // LIBC_INLINE
13*5ff3ff33SPetr Hosek #include "src/__support/macros/config.h"
14f100ec25SGuillaume Chatelet #include "src/__support/macros/properties/architectures.h"
1571405d90SGuillaume Chatelet 
16a2569a76SGuillaume Chatelet #if !defined(LIBC_TARGET_ARCH_IS_X86)
1771405d90SGuillaume Chatelet #error "Invalid include"
1871405d90SGuillaume Chatelet #endif
1971405d90SGuillaume Chatelet 
20c120edc7SMichael Jones #include <stdint.h>
21c120edc7SMichael Jones 
2275bbf4ddSJob Henandez Lara #include "hdr/types/fenv_t.h"
2364730095SGuillaume Chatelet #include "src/__support/macros/sanitizer.h"
24c120edc7SMichael Jones 
25*5ff3ff33SPetr Hosek namespace LIBC_NAMESPACE_DECL {
26c120edc7SMichael Jones namespace fputil {
27c120edc7SMichael Jones 
28c120edc7SMichael Jones namespace internal {
29c120edc7SMichael Jones 
30c120edc7SMichael Jones // Normally, one should be able to define FE_* macros to the exact rounding mode
31c120edc7SMichael Jones // encodings. However, since we want LLVM libc to be compiled against headers
32c120edc7SMichael Jones // from other libcs, we cannot assume that FE_* macros are always defined in
33c120edc7SMichael Jones // such a manner. So, we will define enums corresponding to the x86_64 bit
34c120edc7SMichael Jones // encodings. The implementations can map from FE_* to the corresponding enum
35c120edc7SMichael Jones // values.
36c120edc7SMichael Jones 
37c120edc7SMichael Jones // The rounding control values in the x87 control register and the MXCSR
38c120edc7SMichael Jones // register have the same 2-bit enoding but have different bit positions.
39c120edc7SMichael Jones // See below for the bit positions.
40c120edc7SMichael Jones struct RoundingControlValue {
411c92911eSMichael Jones   static constexpr uint16_t TO_NEAREST = 0x0;
421c92911eSMichael Jones   static constexpr uint16_t DOWNWARD = 0x1;
431c92911eSMichael Jones   static constexpr uint16_t UPWARD = 0x2;
441c92911eSMichael Jones   static constexpr uint16_t TOWARD_ZERO = 0x3;
45c120edc7SMichael Jones };
46c120edc7SMichael Jones 
471c92911eSMichael Jones static constexpr uint16_t X87_ROUNDING_CONTROL_BIT_POSITION = 10;
481c92911eSMichael Jones static constexpr uint16_t MXCSR_ROUNDING_CONTROL_BIT_POSITION = 13;
49c120edc7SMichael Jones 
50c120edc7SMichael Jones // The exception flags in the x87 status register and the MXCSR have the same
51c120edc7SMichael Jones // encoding as well as the same bit positions.
52c120edc7SMichael Jones struct ExceptionFlags {
53073534cbSMichael Jones   static constexpr uint16_t INVALID_F = 0x1;
54c120edc7SMichael Jones   // Some libcs define __FE_DENORM corresponding to the denormal input
55c120edc7SMichael Jones   // exception and include it in FE_ALL_EXCEPTS. We define and use it to
56c120edc7SMichael Jones   // support compiling against headers provided by such libcs.
57073534cbSMichael Jones   static constexpr uint16_t DENORMAL_F = 0x2;
58073534cbSMichael Jones   static constexpr uint16_t DIV_BY_ZERO_F = 0x4;
59073534cbSMichael Jones   static constexpr uint16_t OVERFLOW_F = 0x8;
60073534cbSMichael Jones   static constexpr uint16_t UNDERFLOW_F = 0x10;
61073534cbSMichael Jones   static constexpr uint16_t INEXACT_F = 0x20;
62c120edc7SMichael Jones };
63c120edc7SMichael Jones 
64c120edc7SMichael Jones // The exception control bits occupy six bits, one bit for each exception.
65c120edc7SMichael Jones // In the x87 control word, they occupy the first 6 bits. In the MXCSR
66c120edc7SMichael Jones // register, they occupy bits 7 to 12.
671c92911eSMichael Jones static constexpr uint16_t X87_EXCEPTION_CONTROL_BIT_POSITION = 0;
68805899e6SMichael Jones static constexpr uint16_t X87_EXCEPTION_CONTROL_BIT_POSITION_HIGH = 24;
691c92911eSMichael Jones static constexpr uint16_t MXCSR_EXCEPTION_CONTOL_BIT_POISTION = 7;
70c120edc7SMichael Jones 
71c120edc7SMichael Jones // Exception flags are individual bits in the corresponding registers.
72c120edc7SMichael Jones // So, we just OR the bit values to get the full set of exceptions.
7359c809cdSSiva Chandra Reddy LIBC_INLINE uint16_t get_status_value_for_except(int excepts) {
74c120edc7SMichael Jones   // We will make use of the fact that exception control bits are single
75c120edc7SMichael Jones   // bit flags in the control registers.
76a98a6e95Sluolent   return ((excepts & FE_INVALID) ? ExceptionFlags::INVALID_F : 0) |
77c120edc7SMichael Jones #ifdef __FE_DENORM
78a98a6e95Sluolent          ((excepts & __FE_DENORM) ? ExceptionFlags::DENORMAL_F : 0) |
79c120edc7SMichael Jones #endif // __FE_DENORM
80a98a6e95Sluolent          ((excepts & FE_DIVBYZERO) ? ExceptionFlags::DIV_BY_ZERO_F : 0) |
81a98a6e95Sluolent          ((excepts & FE_OVERFLOW) ? ExceptionFlags::OVERFLOW_F : 0) |
82a98a6e95Sluolent          ((excepts & FE_UNDERFLOW) ? ExceptionFlags::UNDERFLOW_F : 0) |
83a98a6e95Sluolent          ((excepts & FE_INEXACT) ? ExceptionFlags::INEXACT_F : 0);
84c120edc7SMichael Jones }
85c120edc7SMichael Jones 
8659c809cdSSiva Chandra Reddy LIBC_INLINE int exception_status_to_macro(uint16_t status) {
87a98a6e95Sluolent   return ((status & ExceptionFlags::INVALID_F) ? FE_INVALID : 0) |
88c120edc7SMichael Jones #ifdef __FE_DENORM
89a98a6e95Sluolent          ((status & ExceptionFlags::DENORMAL_F) ? __FE_DENORM : 0) |
90c120edc7SMichael Jones #endif // __FE_DENORM
91a98a6e95Sluolent          ((status & ExceptionFlags::DIV_BY_ZERO_F) ? FE_DIVBYZERO : 0) |
92a98a6e95Sluolent          ((status & ExceptionFlags::OVERFLOW_F) ? FE_OVERFLOW : 0) |
93a98a6e95Sluolent          ((status & ExceptionFlags::UNDERFLOW_F) ? FE_UNDERFLOW : 0) |
94a98a6e95Sluolent          ((status & ExceptionFlags::INEXACT_F) ? FE_INEXACT : 0);
95c120edc7SMichael Jones }
96c120edc7SMichael Jones 
97c120edc7SMichael Jones struct X87StateDescriptor {
981c92911eSMichael Jones   uint16_t control_word;
991c92911eSMichael Jones   uint16_t unused1;
1001c92911eSMichael Jones   uint16_t status_word;
1011c92911eSMichael Jones   uint16_t unused2;
102c120edc7SMichael Jones   // TODO: Elaborate the remaining 20 bytes as required.
103c120edc7SMichael Jones   uint32_t _[5];
104c120edc7SMichael Jones };
105c120edc7SMichael Jones 
10659c809cdSSiva Chandra Reddy LIBC_INLINE uint16_t get_x87_control_word() {
107c120edc7SMichael Jones   uint16_t w;
108c120edc7SMichael Jones   __asm__ __volatile__("fnstcw %0" : "=m"(w)::);
109e2263f14SGuillaume Chatelet   MSAN_UNPOISON(&w, sizeof(w));
110c120edc7SMichael Jones   return w;
111c120edc7SMichael Jones }
112c120edc7SMichael Jones 
11359c809cdSSiva Chandra Reddy LIBC_INLINE void write_x87_control_word(uint16_t w) {
114c120edc7SMichael Jones   __asm__ __volatile__("fldcw %0" : : "m"(w) :);
115c120edc7SMichael Jones }
116c120edc7SMichael Jones 
11759c809cdSSiva Chandra Reddy LIBC_INLINE uint16_t get_x87_status_word() {
118c120edc7SMichael Jones   uint16_t w;
119c120edc7SMichael Jones   __asm__ __volatile__("fnstsw %0" : "=m"(w)::);
120e2263f14SGuillaume Chatelet   MSAN_UNPOISON(&w, sizeof(w));
121c120edc7SMichael Jones   return w;
122c120edc7SMichael Jones }
123c120edc7SMichael Jones 
12459c809cdSSiva Chandra Reddy LIBC_INLINE void clear_x87_exceptions() {
125c120edc7SMichael Jones   __asm__ __volatile__("fnclex" : : :);
126c120edc7SMichael Jones }
127c120edc7SMichael Jones 
12859c809cdSSiva Chandra Reddy LIBC_INLINE uint32_t get_mxcsr() {
129c120edc7SMichael Jones   uint32_t w;
130c120edc7SMichael Jones   __asm__ __volatile__("stmxcsr %0" : "=m"(w)::);
131e2263f14SGuillaume Chatelet   MSAN_UNPOISON(&w, sizeof(w));
132c120edc7SMichael Jones   return w;
133c120edc7SMichael Jones }
134c120edc7SMichael Jones 
13559c809cdSSiva Chandra Reddy LIBC_INLINE void write_mxcsr(uint32_t w) {
136c120edc7SMichael Jones   __asm__ __volatile__("ldmxcsr %0" : : "m"(w) :);
137c120edc7SMichael Jones }
138c120edc7SMichael Jones 
13959c809cdSSiva Chandra Reddy LIBC_INLINE void get_x87_state_descriptor(X87StateDescriptor &s) {
140c120edc7SMichael Jones   __asm__ __volatile__("fnstenv %0" : "=m"(s));
141e2263f14SGuillaume Chatelet   MSAN_UNPOISON(&s, sizeof(s));
142c120edc7SMichael Jones }
143c120edc7SMichael Jones 
14459c809cdSSiva Chandra Reddy LIBC_INLINE void write_x87_state_descriptor(const X87StateDescriptor &s) {
145c120edc7SMichael Jones   __asm__ __volatile__("fldenv %0" : : "m"(s) :);
146c120edc7SMichael Jones }
147c120edc7SMichael Jones 
14859c809cdSSiva Chandra Reddy LIBC_INLINE void fwait() { __asm__ __volatile__("fwait"); }
149c120edc7SMichael Jones 
150c120edc7SMichael Jones } // namespace internal
151c120edc7SMichael Jones 
15259c809cdSSiva Chandra Reddy LIBC_INLINE int enable_except(int excepts) {
153c120edc7SMichael Jones   // In the x87 control word and in MXCSR, an exception is blocked
154c120edc7SMichael Jones   // if the corresponding bit is set. That is the reason for all the
155c120edc7SMichael Jones   // bit-flip operations below as we need to turn the bits to zero
156c120edc7SMichael Jones   // to enable them.
157c120edc7SMichael Jones 
1581c92911eSMichael Jones   uint16_t bit_mask = internal::get_status_value_for_except(excepts);
159c120edc7SMichael Jones 
1601c92911eSMichael Jones   uint16_t x87_cw = internal::get_x87_control_word();
1611c92911eSMichael Jones   uint16_t old_excepts = ~x87_cw & 0x3F; // Save previously enabled exceptions.
1621c92911eSMichael Jones   x87_cw &= ~bit_mask;
1631c92911eSMichael Jones   internal::write_x87_control_word(x87_cw);
164c120edc7SMichael Jones 
165c120edc7SMichael Jones   // Enabling SSE exceptions via MXCSR is a nice thing to do but
166c120edc7SMichael Jones   // might not be of much use practically as SSE exceptions and the x87
167c120edc7SMichael Jones   // exceptions are independent of each other.
1681c92911eSMichael Jones   uint32_t mxcsr = internal::get_mxcsr();
1691c92911eSMichael Jones   mxcsr &= ~(bit_mask << internal::MXCSR_EXCEPTION_CONTOL_BIT_POISTION);
1701c92911eSMichael Jones   internal::write_mxcsr(mxcsr);
171c120edc7SMichael Jones 
172c120edc7SMichael Jones   // Since the x87 exceptions and SSE exceptions are independent of each,
173c120edc7SMichael Jones   // it doesn't make much sence to report both in the return value. Most
174c120edc7SMichael Jones   // often, the standard floating point functions deal with FPU operations
175c120edc7SMichael Jones   // so we will retrun only the old x87 exceptions.
1761c92911eSMichael Jones   return internal::exception_status_to_macro(old_excepts);
177c120edc7SMichael Jones }
178c120edc7SMichael Jones 
17959c809cdSSiva Chandra Reddy LIBC_INLINE int disable_except(int excepts) {
180c120edc7SMichael Jones   // In the x87 control word and in MXCSR, an exception is blocked
181c120edc7SMichael Jones   // if the corresponding bit is set.
182c120edc7SMichael Jones 
1831c92911eSMichael Jones   uint16_t bit_mask = internal::get_status_value_for_except(excepts);
184c120edc7SMichael Jones 
1851c92911eSMichael Jones   uint16_t x87_cw = internal::get_x87_control_word();
1861c92911eSMichael Jones   uint16_t old_excepts = ~x87_cw & 0x3F; // Save previously enabled exceptions.
1871c92911eSMichael Jones   x87_cw |= bit_mask;
1881c92911eSMichael Jones   internal::write_x87_control_word(x87_cw);
189c120edc7SMichael Jones 
1901c92911eSMichael Jones   // Just like in enable_except, it is not clear if disabling SSE exceptions
191c120edc7SMichael Jones   // is required. But, we will still do it only as a "nice thing to do".
1921c92911eSMichael Jones   uint32_t mxcsr = internal::get_mxcsr();
1931c92911eSMichael Jones   mxcsr |= (bit_mask << internal::MXCSR_EXCEPTION_CONTOL_BIT_POISTION);
1941c92911eSMichael Jones   internal::write_mxcsr(mxcsr);
195c120edc7SMichael Jones 
1961c92911eSMichael Jones   return internal::exception_status_to_macro(old_excepts);
197c120edc7SMichael Jones }
198c120edc7SMichael Jones 
19959c809cdSSiva Chandra Reddy LIBC_INLINE int get_except() {
200122da690SAlex Brachet   uint16_t mxcsr = static_cast<uint16_t>(internal::get_mxcsr());
201805899e6SMichael Jones   uint16_t enabled_excepts = ~(mxcsr >> 7) & 0x3F;
2021c92911eSMichael Jones   return internal::exception_status_to_macro(enabled_excepts);
203c5cfbe40SSiva Chandra Reddy }
204c5cfbe40SSiva Chandra Reddy 
20559c809cdSSiva Chandra Reddy LIBC_INLINE int clear_except(int excepts) {
206c120edc7SMichael Jones   internal::X87StateDescriptor state;
2071c92911eSMichael Jones   internal::get_x87_state_descriptor(state);
20840a55fffSAlex Brachet   state.status_word &=
20940a55fffSAlex Brachet       static_cast<uint16_t>(~internal::get_status_value_for_except(excepts));
2101c92911eSMichael Jones   internal::write_x87_state_descriptor(state);
211c120edc7SMichael Jones 
2121c92911eSMichael Jones   uint32_t mxcsr = internal::get_mxcsr();
2131c92911eSMichael Jones   mxcsr &= ~internal::get_status_value_for_except(excepts);
2141c92911eSMichael Jones   internal::write_mxcsr(mxcsr);
215c120edc7SMichael Jones   return 0;
216c120edc7SMichael Jones }
217c120edc7SMichael Jones 
21859c809cdSSiva Chandra Reddy LIBC_INLINE int test_except(int excepts) {
21950414422SDominic Chen   uint16_t status_word = internal::get_x87_status_word();
22050414422SDominic Chen   uint32_t mxcsr = internal::get_mxcsr();
221c120edc7SMichael Jones   // Check both x87 status word and MXCSR.
22250414422SDominic Chen   uint16_t status_value = internal::get_status_value_for_except(excepts);
22340a55fffSAlex Brachet   return internal::exception_status_to_macro(
22450414422SDominic Chen       static_cast<uint16_t>(status_value & (status_word | mxcsr)));
225c120edc7SMichael Jones }
226c120edc7SMichael Jones 
227c120edc7SMichael Jones // Sets the exception flags but does not trigger the exception handler.
22859c809cdSSiva Chandra Reddy LIBC_INLINE int set_except(int excepts) {
2291c92911eSMichael Jones   uint16_t status_value = internal::get_status_value_for_except(excepts);
230c120edc7SMichael Jones   internal::X87StateDescriptor state;
2311c92911eSMichael Jones   internal::get_x87_state_descriptor(state);
2321c92911eSMichael Jones   state.status_word |= status_value;
2331c92911eSMichael Jones   internal::write_x87_state_descriptor(state);
234c120edc7SMichael Jones 
2351c92911eSMichael Jones   uint32_t mxcsr = internal::get_mxcsr();
2361c92911eSMichael Jones   mxcsr |= status_value;
2371c92911eSMichael Jones   internal::write_mxcsr(mxcsr);
238c120edc7SMichael Jones 
239c120edc7SMichael Jones   return 0;
240c120edc7SMichael Jones }
241c120edc7SMichael Jones 
24259c809cdSSiva Chandra Reddy LIBC_INLINE int raise_except(int excepts) {
2431c92911eSMichael Jones   uint16_t status_value = internal::get_status_value_for_except(excepts);
244c120edc7SMichael Jones 
245c120edc7SMichael Jones   // We set the status flag for exception one at a time and call the
246c120edc7SMichael Jones   // fwait instruction to actually get the processor to raise the
247c120edc7SMichael Jones   // exception by calling the exception handler. This scheme is per
248678e3ee1SFangrui Song   // the description in "8.6 X87 FPU EXCEPTION SYNCHRONIZATION"
249c120edc7SMichael Jones   // of the "Intel 64 and IA-32 Architectures Software Developer's
250c120edc7SMichael Jones   // Manual, Vol 1".
251c120edc7SMichael Jones 
252d4a01549SJay Foad   // FPU status word is read for each exception separately as the
253c120edc7SMichael Jones   // exception handler can potentially write to it (typically to clear
254c120edc7SMichael Jones   // the corresponding exception flag). By reading it separately, we
255c120edc7SMichael Jones   // ensure that the writes by the exception handler are maintained
256c120edc7SMichael Jones   // when raising the next exception.
257c120edc7SMichael Jones 
2581c92911eSMichael Jones   auto raise_helper = [](uint16_t singleExceptFlag) {
259c120edc7SMichael Jones     internal::X87StateDescriptor state;
260805899e6SMichael Jones     uint32_t mxcsr = 0;
2611c92911eSMichael Jones     internal::get_x87_state_descriptor(state);
262805899e6SMichael Jones     mxcsr = internal::get_mxcsr();
2631c92911eSMichael Jones     state.status_word |= singleExceptFlag;
264805899e6SMichael Jones     mxcsr |= singleExceptFlag;
2651c92911eSMichael Jones     internal::write_x87_state_descriptor(state);
266805899e6SMichael Jones     internal::write_mxcsr(mxcsr);
267c120edc7SMichael Jones     internal::fwait();
268c120edc7SMichael Jones   };
269c120edc7SMichael Jones 
270073534cbSMichael Jones   if (status_value & internal::ExceptionFlags::INVALID_F)
271073534cbSMichael Jones     raise_helper(internal::ExceptionFlags::INVALID_F);
272073534cbSMichael Jones   if (status_value & internal::ExceptionFlags::DIV_BY_ZERO_F)
273073534cbSMichael Jones     raise_helper(internal::ExceptionFlags::DIV_BY_ZERO_F);
274073534cbSMichael Jones   if (status_value & internal::ExceptionFlags::OVERFLOW_F)
275073534cbSMichael Jones     raise_helper(internal::ExceptionFlags::OVERFLOW_F);
276073534cbSMichael Jones   if (status_value & internal::ExceptionFlags::UNDERFLOW_F)
277073534cbSMichael Jones     raise_helper(internal::ExceptionFlags::UNDERFLOW_F);
278073534cbSMichael Jones   if (status_value & internal::ExceptionFlags::INEXACT_F)
279073534cbSMichael Jones     raise_helper(internal::ExceptionFlags::INEXACT_F);
280c120edc7SMichael Jones #ifdef __FE_DENORM
281073534cbSMichael Jones   if (status_value & internal::ExceptionFlags::DENORMAL_F) {
282073534cbSMichael Jones     raise_helper(internal::ExceptionFlags::DENORMAL_F);
283c120edc7SMichael Jones   }
284c120edc7SMichael Jones #endif // __FE_DENORM
285c120edc7SMichael Jones 
286c120edc7SMichael Jones   // There is no special synchronization scheme available to
287c120edc7SMichael Jones   // raise SEE exceptions. So, we will ignore that for now.
288c120edc7SMichael Jones   // Just plain writing to the MXCSR register does not guarantee
289c120edc7SMichael Jones   // the exception handler will be called.
290c120edc7SMichael Jones 
291c120edc7SMichael Jones   return 0;
292c120edc7SMichael Jones }
293c120edc7SMichael Jones 
29459c809cdSSiva Chandra Reddy LIBC_INLINE int get_round() {
2951c92911eSMichael Jones   uint16_t bit_value =
2961c92911eSMichael Jones       (internal::get_mxcsr() >> internal::MXCSR_ROUNDING_CONTROL_BIT_POSITION) &
2971c92911eSMichael Jones       0x3;
2981c92911eSMichael Jones   switch (bit_value) {
2991c92911eSMichael Jones   case internal::RoundingControlValue::TO_NEAREST:
300c120edc7SMichael Jones     return FE_TONEAREST;
3011c92911eSMichael Jones   case internal::RoundingControlValue::DOWNWARD:
302c120edc7SMichael Jones     return FE_DOWNWARD;
3031c92911eSMichael Jones   case internal::RoundingControlValue::UPWARD:
304c120edc7SMichael Jones     return FE_UPWARD;
3051c92911eSMichael Jones   case internal::RoundingControlValue::TOWARD_ZERO:
306c120edc7SMichael Jones     return FE_TOWARDZERO;
307c120edc7SMichael Jones   default:
308c120edc7SMichael Jones     return -1; // Error value.
309c120edc7SMichael Jones   }
310c120edc7SMichael Jones }
311c120edc7SMichael Jones 
31259c809cdSSiva Chandra Reddy LIBC_INLINE int set_round(int mode) {
3131c92911eSMichael Jones   uint16_t bit_value;
314c120edc7SMichael Jones   switch (mode) {
315c120edc7SMichael Jones   case FE_TONEAREST:
3161c92911eSMichael Jones     bit_value = internal::RoundingControlValue::TO_NEAREST;
317c120edc7SMichael Jones     break;
318c120edc7SMichael Jones   case FE_DOWNWARD:
3191c92911eSMichael Jones     bit_value = internal::RoundingControlValue::DOWNWARD;
320c120edc7SMichael Jones     break;
321c120edc7SMichael Jones   case FE_UPWARD:
3221c92911eSMichael Jones     bit_value = internal::RoundingControlValue::UPWARD;
323c120edc7SMichael Jones     break;
324c120edc7SMichael Jones   case FE_TOWARDZERO:
3251c92911eSMichael Jones     bit_value = internal::RoundingControlValue::TOWARD_ZERO;
326c120edc7SMichael Jones     break;
327c120edc7SMichael Jones   default:
328c120edc7SMichael Jones     return 1; // To indicate failure
329c120edc7SMichael Jones   }
330c120edc7SMichael Jones 
3311c92911eSMichael Jones   uint16_t x87_value = static_cast<uint16_t>(
3321c92911eSMichael Jones       bit_value << internal::X87_ROUNDING_CONTROL_BIT_POSITION);
3331c92911eSMichael Jones   uint16_t x87_control = internal::get_x87_control_word();
3341c92911eSMichael Jones   x87_control = static_cast<uint16_t>(
3351c92911eSMichael Jones       (x87_control &
3361c92911eSMichael Jones        ~(uint16_t(0x3) << internal::X87_ROUNDING_CONTROL_BIT_POSITION)) |
3371c92911eSMichael Jones       x87_value);
3381c92911eSMichael Jones   internal::write_x87_control_word(x87_control);
339c120edc7SMichael Jones 
3401c92911eSMichael Jones   uint32_t mxcsr_value = bit_value
3411c92911eSMichael Jones                          << internal::MXCSR_ROUNDING_CONTROL_BIT_POSITION;
3421c92911eSMichael Jones   uint32_t mxcsr_control = internal::get_mxcsr();
3431c92911eSMichael Jones   mxcsr_control = (mxcsr_control &
3441c92911eSMichael Jones                    ~(0x3 << internal::MXCSR_ROUNDING_CONTROL_BIT_POSITION)) |
3451c92911eSMichael Jones                   mxcsr_value;
3461c92911eSMichael Jones   internal::write_mxcsr(mxcsr_control);
347c120edc7SMichael Jones 
348c120edc7SMichael Jones   return 0;
349c120edc7SMichael Jones }
350c120edc7SMichael Jones 
351c120edc7SMichael Jones namespace internal {
352c120edc7SMichael Jones 
35350414422SDominic Chen #if defined(_WIN32)
354c120edc7SMichael Jones // MSVC fenv.h defines a very simple representation of the floating point state
355c120edc7SMichael Jones // which just consists of control and status words of the x87 unit.
356c120edc7SMichael Jones struct FPState {
357805899e6SMichael Jones   uint32_t control_word;
358805899e6SMichael Jones   uint32_t status_word;
359c120edc7SMichael Jones };
36050414422SDominic Chen #elif defined(__APPLE__)
36150414422SDominic Chen struct FPState {
36250414422SDominic Chen   uint16_t control_word;
36350414422SDominic Chen   uint16_t status_word;
36450414422SDominic Chen   uint32_t mxcsr;
36550414422SDominic Chen   uint8_t reserved[8];
36650414422SDominic Chen };
367c120edc7SMichael Jones #else
368c120edc7SMichael Jones struct FPState {
3691c92911eSMichael Jones   X87StateDescriptor x87_status;
3701c92911eSMichael Jones   uint32_t mxcsr;
371c120edc7SMichael Jones };
372c120edc7SMichael Jones #endif // _WIN32
373c120edc7SMichael Jones 
374c120edc7SMichael Jones } // namespace internal
375c120edc7SMichael Jones 
376c120edc7SMichael Jones static_assert(
377c120edc7SMichael Jones     sizeof(fenv_t) == sizeof(internal::FPState),
378c120edc7SMichael Jones     "Internal floating point state does not match the public fenv_t type.");
379c120edc7SMichael Jones 
380c120edc7SMichael Jones #ifdef _WIN32
381805899e6SMichael Jones 
382805899e6SMichael Jones // The exception flags in the Windows FEnv struct and the MXCSR have almost
383805899e6SMichael Jones // reversed bit positions.
384805899e6SMichael Jones struct WinExceptionFlags {
385073534cbSMichael Jones   static constexpr uint32_t INEXACT_WIN = 0x01;
386073534cbSMichael Jones   static constexpr uint32_t UNDERFLOW_WIN = 0x02;
387073534cbSMichael Jones   static constexpr uint32_t OVERFLOW_WIN = 0x04;
388073534cbSMichael Jones   static constexpr uint32_t DIV_BY_ZERO_WIN = 0x08;
389073534cbSMichael Jones   static constexpr uint32_t INVALID_WIN = 0x10;
390073534cbSMichael Jones   static constexpr uint32_t DENORMAL_WIN = 0x20;
391805899e6SMichael Jones 
392805899e6SMichael Jones   // The Windows FEnv struct has a second copy of all of these bits in the high
393805899e6SMichael Jones   // byte of the 32 bit control word. These are used as the source of truth when
394805899e6SMichael Jones   // calling fesetenv.
395805899e6SMichael Jones   static constexpr uint32_t HIGH_OFFSET = 24;
396805899e6SMichael Jones 
397073534cbSMichael Jones   static constexpr uint32_t HIGH_INEXACT = INEXACT_WIN << HIGH_OFFSET;
398073534cbSMichael Jones   static constexpr uint32_t HIGH_UNDERFLOW = UNDERFLOW_WIN << HIGH_OFFSET;
399073534cbSMichael Jones   static constexpr uint32_t HIGH_OVERFLOW = OVERFLOW_WIN << HIGH_OFFSET;
400073534cbSMichael Jones   static constexpr uint32_t HIGH_DIV_BY_ZERO = DIV_BY_ZERO_WIN << HIGH_OFFSET;
401073534cbSMichael Jones   static constexpr uint32_t HIGH_INVALID = INVALID_WIN << HIGH_OFFSET;
402073534cbSMichael Jones   static constexpr uint32_t HIGH_DENORMAL = DENORMAL_WIN << HIGH_OFFSET;
403805899e6SMichael Jones };
404805899e6SMichael Jones 
405805899e6SMichael Jones /*
406805899e6SMichael Jones     fenv_t control word format:
407805899e6SMichael Jones 
408805899e6SMichael Jones     Windows (at least for x64) uses a 4 byte control fenv control word stored in
409805899e6SMichael Jones     a 32 bit integer. The first byte contains just the rounding mode and the
410805899e6SMichael Jones     exception masks, while the last two bytes contain that same information as
411805899e6SMichael Jones     well as the flush-to-zero and denormals-are-zero flags. The flags are
412805899e6SMichael Jones     represented with a truth table:
413805899e6SMichael Jones 
414805899e6SMichael Jones     00 - No flags set
415805899e6SMichael Jones     01 - Flush-to-zero and Denormals-are-zero set
416805899e6SMichael Jones     11 - Flush-to-zero set
417805899e6SMichael Jones     10 - Denormals-are-zero set
418805899e6SMichael Jones 
419805899e6SMichael Jones     U represents unused.
420805899e6SMichael Jones 
421805899e6SMichael Jones      +-----Rounding Mode-----+
422805899e6SMichael Jones      |                       |
423805899e6SMichael Jones     ++                      ++
424805899e6SMichael Jones     ||                      ||
425805899e6SMichael Jones     RRMMMMMM UUUUUUUU UUUUFFRR UUMMMMMM
426805899e6SMichael Jones       |    |              ||     |    |
427805899e6SMichael Jones       +----+      flags---++     +----+
428805899e6SMichael Jones            |                          |
429805899e6SMichael Jones            +------Exception Masks-----+
430805899e6SMichael Jones 
431805899e6SMichael Jones 
432805899e6SMichael Jones     fenv_t status word format:
433805899e6SMichael Jones 
434805899e6SMichael Jones     The status word is a lot simpler for this conversion, since only the
435805899e6SMichael Jones     exception flags are used in the MXCSR.
436805899e6SMichael Jones 
437805899e6SMichael Jones       +----+---Exception Flags---+----+
438805899e6SMichael Jones       |    |                     |    |
439805899e6SMichael Jones     UUEEEEEE UUUUUUUU UUUUUUUU UUEEEEEE
440805899e6SMichael Jones 
441805899e6SMichael Jones 
442805899e6SMichael Jones 
443805899e6SMichael Jones     MXCSR Format:
444805899e6SMichael Jones 
445805899e6SMichael Jones     The MXCSR format is the same information, just organized differently. Since
446805899e6SMichael Jones     the fenv_t struct for windows doesn't include the mxcsr bits, they must be
447805899e6SMichael Jones     generated from the control word bits.
448805899e6SMichael Jones 
449805899e6SMichael Jones       Exception Masks---+           +---Exception Flags
450805899e6SMichael Jones                         |           |
451805899e6SMichael Jones      Flush-to-zero---+  +----+ +----+
452805899e6SMichael Jones                      |  |    | |    |
453805899e6SMichael Jones                      FRRMMMMMMDEEEEEE
454805899e6SMichael Jones                       ||      |
455805899e6SMichael Jones                       ++      +---Denormals-are-zero
456805899e6SMichael Jones                       |
457805899e6SMichael Jones                       +---Rounding Mode
458805899e6SMichael Jones 
459805899e6SMichael Jones 
460805899e6SMichael Jones     The mask and flag order is as follows:
461805899e6SMichael Jones 
462805899e6SMichael Jones     fenv_t      mxcsr
463805899e6SMichael Jones 
464805899e6SMichael Jones     denormal    inexact
465805899e6SMichael Jones     invalid     underflow
466805899e6SMichael Jones     div by 0    overflow
467805899e6SMichael Jones     overflow    div by 0
468805899e6SMichael Jones     underflow   denormal
469805899e6SMichael Jones     inexact     invalid
470805899e6SMichael Jones 
471805899e6SMichael Jones     This is almost reverse, except for denormal and invalid which are in the
472805899e6SMichael Jones     same order in both.
473805899e6SMichael Jones   */
474805899e6SMichael Jones 
47559c809cdSSiva Chandra Reddy LIBC_INLINE int get_env(fenv_t *envp) {
476c120edc7SMichael Jones   internal::FPState *state = reinterpret_cast<internal::FPState *>(envp);
477805899e6SMichael Jones 
478805899e6SMichael Jones   uint32_t status_word = 0;
479805899e6SMichael Jones   uint32_t control_word = 0;
480805899e6SMichael Jones 
481805899e6SMichael Jones   uint32_t mxcsr = internal::get_mxcsr();
482805899e6SMichael Jones 
483805899e6SMichael Jones   // Set exception flags in the status word
484073534cbSMichael Jones   status_word |= (mxcsr & (internal::ExceptionFlags::INVALID_F |
485073534cbSMichael Jones                            internal::ExceptionFlags::DENORMAL_F))
486805899e6SMichael Jones                  << 4;
487073534cbSMichael Jones   status_word |= (mxcsr & internal::ExceptionFlags::DIV_BY_ZERO_F) << 1;
488073534cbSMichael Jones   status_word |= (mxcsr & internal::ExceptionFlags::OVERFLOW_F) >> 1;
489073534cbSMichael Jones   status_word |= (mxcsr & internal::ExceptionFlags::UNDERFLOW_F) >> 3;
490073534cbSMichael Jones   status_word |= (mxcsr & internal::ExceptionFlags::INEXACT_F) >> 5;
491805899e6SMichael Jones   status_word |= status_word << WinExceptionFlags::HIGH_OFFSET;
492805899e6SMichael Jones 
493805899e6SMichael Jones   // Set exception masks in bits 0-5 and 24-29
494073534cbSMichael Jones   control_word |= (mxcsr & ((internal::ExceptionFlags::INVALID_F |
495073534cbSMichael Jones                              internal::ExceptionFlags::DENORMAL_F)
496805899e6SMichael Jones                             << 7)) >>
497805899e6SMichael Jones                   3;
498073534cbSMichael Jones   control_word |= (mxcsr & (internal::ExceptionFlags::DIV_BY_ZERO_F << 7)) >> 6;
499073534cbSMichael Jones   control_word |= (mxcsr & (internal::ExceptionFlags::OVERFLOW_F << 7)) >> 8;
500073534cbSMichael Jones   control_word |= (mxcsr & (internal::ExceptionFlags::UNDERFLOW_F << 7)) >> 10;
501073534cbSMichael Jones   control_word |= (mxcsr & (internal::ExceptionFlags::INEXACT_F << 7)) >> 12;
502805899e6SMichael Jones   control_word |= control_word << WinExceptionFlags::HIGH_OFFSET;
503805899e6SMichael Jones 
504805899e6SMichael Jones   // Set rounding in bits 8-9 and 30-31
505805899e6SMichael Jones   control_word |= (mxcsr & 0x6000) >> 5;
506805899e6SMichael Jones   control_word |= (mxcsr & 0x6000) << 17;
507805899e6SMichael Jones 
508805899e6SMichael Jones   // Set flush-to-zero in bit 10
509805899e6SMichael Jones   control_word |= (mxcsr & 0x8000) >> 5;
510805899e6SMichael Jones 
511805899e6SMichael Jones   // Set denormals-are-zero xor flush-to-zero in bit 11
512805899e6SMichael Jones   control_word |= (((mxcsr & 0x8000) >> 9) ^ (mxcsr & 0x0040)) << 5;
513805899e6SMichael Jones 
514805899e6SMichael Jones   state->control_word = control_word;
515805899e6SMichael Jones   state->status_word = status_word;
516c120edc7SMichael Jones   return 0;
517c120edc7SMichael Jones }
518c120edc7SMichael Jones 
51959c809cdSSiva Chandra Reddy LIBC_INLINE int set_env(const fenv_t *envp) {
520c120edc7SMichael Jones   const internal::FPState *state =
521c120edc7SMichael Jones       reinterpret_cast<const internal::FPState *>(envp);
522805899e6SMichael Jones 
523805899e6SMichael Jones   uint32_t mxcsr = 0;
524805899e6SMichael Jones 
525805899e6SMichael Jones   // Set exception flags from the status word
526805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>(
527805899e6SMichael Jones       (state->status_word &
528805899e6SMichael Jones        (WinExceptionFlags::HIGH_DENORMAL | WinExceptionFlags::HIGH_INVALID)) >>
529805899e6SMichael Jones       28);
530805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>(
531805899e6SMichael Jones       (state->status_word & WinExceptionFlags::HIGH_DIV_BY_ZERO) >> 25);
532805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>(
533805899e6SMichael Jones       (state->status_word & WinExceptionFlags::HIGH_OVERFLOW) >> 23);
534805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>(
535805899e6SMichael Jones       (state->status_word & WinExceptionFlags::HIGH_UNDERFLOW) >> 21);
536805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>(
537805899e6SMichael Jones       (state->status_word & WinExceptionFlags::HIGH_INEXACT) >> 19);
538805899e6SMichael Jones 
539805899e6SMichael Jones   // Set denormals-are-zero from bit 10 xor bit 11
540805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>(
541805899e6SMichael Jones       (((state->control_word & 0x800) >> 1) ^ (state->control_word & 0x400)) >>
542805899e6SMichael Jones       4);
543805899e6SMichael Jones 
544805899e6SMichael Jones   // Set exception masks from bits 24-29
545805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>(
546805899e6SMichael Jones       (state->control_word &
547805899e6SMichael Jones        (WinExceptionFlags::HIGH_DENORMAL | WinExceptionFlags::HIGH_INVALID)) >>
548805899e6SMichael Jones       21);
549805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>(
550805899e6SMichael Jones       (state->control_word & WinExceptionFlags::HIGH_DIV_BY_ZERO) >> 18);
551805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>(
552805899e6SMichael Jones       (state->control_word & WinExceptionFlags::HIGH_OVERFLOW) >> 16);
553805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>(
554805899e6SMichael Jones       (state->control_word & WinExceptionFlags::HIGH_UNDERFLOW) >> 14);
555805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>(
556805899e6SMichael Jones       (state->control_word & WinExceptionFlags::HIGH_INEXACT) >> 12);
557805899e6SMichael Jones 
558805899e6SMichael Jones   // Set rounding from bits 30-31
559805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>((state->control_word & 0xc0000000) >> 17);
560805899e6SMichael Jones 
561805899e6SMichael Jones   // Set flush-to-zero from bit 10
562805899e6SMichael Jones   mxcsr |= static_cast<uint16_t>((state->control_word & 0x400) << 5);
563805899e6SMichael Jones 
564805899e6SMichael Jones   internal::write_mxcsr(mxcsr);
565c120edc7SMichael Jones   return 0;
566c120edc7SMichael Jones }
567c120edc7SMichael Jones #else
56859c809cdSSiva Chandra Reddy LIBC_INLINE int get_env(fenv_t *envp) {
569c120edc7SMichael Jones   internal::FPState *state = reinterpret_cast<internal::FPState *>(envp);
57050414422SDominic Chen #ifdef __APPLE__
57150414422SDominic Chen   internal::X87StateDescriptor x87_status;
57250414422SDominic Chen   internal::get_x87_state_descriptor(x87_status);
57350414422SDominic Chen   state->control_word = x87_status.control_word;
57450414422SDominic Chen   state->status_word = x87_status.status_word;
57550414422SDominic Chen #else
5761c92911eSMichael Jones   internal::get_x87_state_descriptor(state->x87_status);
57750414422SDominic Chen #endif // __APPLE__
5781c92911eSMichael Jones   state->mxcsr = internal::get_mxcsr();
579c120edc7SMichael Jones   return 0;
580c120edc7SMichael Jones }
581c120edc7SMichael Jones 
58259c809cdSSiva Chandra Reddy LIBC_INLINE int set_env(const fenv_t *envp) {
5835c3c716bSSiva Chandra Reddy   // envp contains everything including pieces like the current
5845c3c716bSSiva Chandra Reddy   // top of FPU stack. We cannot arbitrarily change them. So, we first
5855c3c716bSSiva Chandra Reddy   // read the current status and update only those pieces which are
5865c3c716bSSiva Chandra Reddy   // not disruptive.
5871c92911eSMichael Jones   internal::X87StateDescriptor x87_status;
5881c92911eSMichael Jones   internal::get_x87_state_descriptor(x87_status);
5895c3c716bSSiva Chandra Reddy 
5905c3c716bSSiva Chandra Reddy   if (envp == FE_DFL_ENV) {
5915c3c716bSSiva Chandra Reddy     // Reset the exception flags in the status word.
5921c92911eSMichael Jones     x87_status.status_word &= ~uint16_t(0x3F);
5935c3c716bSSiva Chandra Reddy     // Reset other non-sensitive parts of the status word.
5945c3c716bSSiva Chandra Reddy     for (int i = 0; i < 5; i++)
5951c92911eSMichael Jones       x87_status._[i] = 0;
5965c3c716bSSiva Chandra Reddy     // In the control word, we do the following:
5975c3c716bSSiva Chandra Reddy     // 1. Mask all exceptions
5985c3c716bSSiva Chandra Reddy     // 2. Set rounding mode to round-to-nearest
5995c3c716bSSiva Chandra Reddy     // 3. Set the internal precision to double extended precision.
6001c92911eSMichael Jones     x87_status.control_word |= uint16_t(0x3F);         // Mask all exceptions.
6011c92911eSMichael Jones     x87_status.control_word &= ~(uint16_t(0x3) << 10); // Round to nearest.
6021c92911eSMichael Jones     x87_status.control_word |= (uint16_t(0x3) << 8);   // Extended precision.
6031c92911eSMichael Jones     internal::write_x87_state_descriptor(x87_status);
6045c3c716bSSiva Chandra Reddy 
6055c3c716bSSiva Chandra Reddy     // We take the exact same approach MXCSR register as well.
6065c3c716bSSiva Chandra Reddy     // MXCSR has two additional fields, "flush-to-zero" and
6075c3c716bSSiva Chandra Reddy     // "denormals-are-zero". We reset those bits. Also, MXCSR does not
6085c3c716bSSiva Chandra Reddy     // have a field which controls the precision of internal operations.
6091c92911eSMichael Jones     uint32_t mxcsr = internal::get_mxcsr();
6105c3c716bSSiva Chandra Reddy     mxcsr &= ~uint16_t(0x3F);        // Clear exception flags.
6115c3c716bSSiva Chandra Reddy     mxcsr &= ~(uint16_t(0x1) << 6);  // Reset denormals-are-zero
6125c3c716bSSiva Chandra Reddy     mxcsr |= (uint16_t(0x3F) << 7);  // Mask exceptions
6135c3c716bSSiva Chandra Reddy     mxcsr &= ~(uint16_t(0x3) << 13); // Round to nearest.
6145c3c716bSSiva Chandra Reddy     mxcsr &= ~(uint16_t(0x1) << 15); // Reset flush-to-zero
6151c92911eSMichael Jones     internal::write_mxcsr(mxcsr);
6165c3c716bSSiva Chandra Reddy 
6175c3c716bSSiva Chandra Reddy     return 0;
6185c3c716bSSiva Chandra Reddy   }
6195c3c716bSSiva Chandra Reddy 
6205c3c716bSSiva Chandra Reddy   const internal::FPState *fpstate =
621c120edc7SMichael Jones       reinterpret_cast<const internal::FPState *>(envp);
6225c3c716bSSiva Chandra Reddy 
6235c3c716bSSiva Chandra Reddy   // Copy the exception status flags from envp.
6241c92911eSMichael Jones   x87_status.status_word &= ~uint16_t(0x3F);
62550414422SDominic Chen #ifdef __APPLE__
62650414422SDominic Chen   x87_status.status_word |= (fpstate->status_word & 0x3F);
62750414422SDominic Chen   // We can set the x87 control word as is as there no sensitive bits.
62850414422SDominic Chen   x87_status.control_word = fpstate->control_word;
62950414422SDominic Chen #else
6301c92911eSMichael Jones   x87_status.status_word |= (fpstate->x87_status.status_word & 0x3F);
6315c3c716bSSiva Chandra Reddy   // Copy other non-sensitive parts of the status word.
6325c3c716bSSiva Chandra Reddy   for (int i = 0; i < 5; i++)
6331c92911eSMichael Jones     x87_status._[i] = fpstate->x87_status._[i];
6345c3c716bSSiva Chandra Reddy   // We can set the x87 control word as is as there no sensitive bits.
6351c92911eSMichael Jones   x87_status.control_word = fpstate->x87_status.control_word;
63650414422SDominic Chen #endif // __APPLE__
6371c92911eSMichael Jones   internal::write_x87_state_descriptor(x87_status);
6385c3c716bSSiva Chandra Reddy 
6395c3c716bSSiva Chandra Reddy   // We can write the MXCSR state as is as there are no sensitive bits.
6401c92911eSMichael Jones   internal::write_mxcsr(fpstate->mxcsr);
641c120edc7SMichael Jones   return 0;
642c120edc7SMichael Jones }
643c120edc7SMichael Jones #endif
644c120edc7SMichael Jones 
645c120edc7SMichael Jones } // namespace fputil
646*5ff3ff33SPetr Hosek } // namespace LIBC_NAMESPACE_DECL
647c120edc7SMichael Jones 
648270547f3SGuillaume Chatelet #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_X86_64_FENVIMPL_H
649