xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
106c3fb27SDimitry Andric //===-- SIModeRegisterDefaults.h --------------------------------*- C++ -*-===//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric 
906c3fb27SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
1006c3fb27SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
1106c3fb27SDimitry Andric 
1206c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
1306c3fb27SDimitry Andric #include "llvm/ADT/FloatingPointMode.h"
1406c3fb27SDimitry Andric 
1506c3fb27SDimitry Andric namespace llvm {
1606c3fb27SDimitry Andric 
17*5f757f3fSDimitry Andric class GCNSubtarget;
18*5f757f3fSDimitry Andric 
1906c3fb27SDimitry Andric // Track defaults for fields in the MODE register.
2006c3fb27SDimitry Andric struct SIModeRegisterDefaults {
2106c3fb27SDimitry Andric   /// Floating point opcodes that support exception flag gathering quiet and
2206c3fb27SDimitry Andric   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
2306c3fb27SDimitry Andric   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
2406c3fb27SDimitry Andric   /// quieting.
2506c3fb27SDimitry Andric   bool IEEE : 1;
2606c3fb27SDimitry Andric 
2706c3fb27SDimitry Andric   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
2806c3fb27SDimitry Andric   /// clamp NaN to zero; otherwise, pass NaN through.
2906c3fb27SDimitry Andric   bool DX10Clamp : 1;
3006c3fb27SDimitry Andric 
3106c3fb27SDimitry Andric   /// If this is set, neither input or output denormals are flushed for most f32
3206c3fb27SDimitry Andric   /// instructions.
3306c3fb27SDimitry Andric   DenormalMode FP32Denormals;
3406c3fb27SDimitry Andric 
3506c3fb27SDimitry Andric   /// If this is set, neither input or output denormals are flushed for both f64
3606c3fb27SDimitry Andric   /// and f16/v2f16 instructions.
3706c3fb27SDimitry Andric   DenormalMode FP64FP16Denormals;
3806c3fb27SDimitry Andric 
3906c3fb27SDimitry Andric   SIModeRegisterDefaults() :
4006c3fb27SDimitry Andric     IEEE(true),
4106c3fb27SDimitry Andric     DX10Clamp(true),
4206c3fb27SDimitry Andric     FP32Denormals(DenormalMode::getIEEE()),
4306c3fb27SDimitry Andric     FP64FP16Denormals(DenormalMode::getIEEE()) {}
4406c3fb27SDimitry Andric 
45*5f757f3fSDimitry Andric   SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
4606c3fb27SDimitry Andric 
4706c3fb27SDimitry Andric   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
4806c3fb27SDimitry Andric     SIModeRegisterDefaults Mode;
4906c3fb27SDimitry Andric     Mode.IEEE = !AMDGPU::isShader(CC);
5006c3fb27SDimitry Andric     return Mode;
5106c3fb27SDimitry Andric   }
5206c3fb27SDimitry Andric 
5306c3fb27SDimitry Andric   bool operator==(const SIModeRegisterDefaults Other) const {
5406c3fb27SDimitry Andric     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
5506c3fb27SDimitry Andric            FP32Denormals == Other.FP32Denormals &&
5606c3fb27SDimitry Andric            FP64FP16Denormals == Other.FP64FP16Denormals;
5706c3fb27SDimitry Andric   }
5806c3fb27SDimitry Andric 
5906c3fb27SDimitry Andric   /// Get the encoding value for the FP_DENORM bits of the mode register for the
6006c3fb27SDimitry Andric   /// FP32 denormal mode.
6106c3fb27SDimitry Andric   uint32_t fpDenormModeSPValue() const {
6206c3fb27SDimitry Andric     if (FP32Denormals == DenormalMode::getPreserveSign())
6306c3fb27SDimitry Andric       return FP_DENORM_FLUSH_IN_FLUSH_OUT;
6406c3fb27SDimitry Andric     if (FP32Denormals.Output == DenormalMode::PreserveSign)
6506c3fb27SDimitry Andric       return FP_DENORM_FLUSH_OUT;
6606c3fb27SDimitry Andric     if (FP32Denormals.Input == DenormalMode::PreserveSign)
6706c3fb27SDimitry Andric       return FP_DENORM_FLUSH_IN;
6806c3fb27SDimitry Andric     return FP_DENORM_FLUSH_NONE;
6906c3fb27SDimitry Andric   }
7006c3fb27SDimitry Andric 
7106c3fb27SDimitry Andric   /// Get the encoding value for the FP_DENORM bits of the mode register for the
7206c3fb27SDimitry Andric   /// FP64/FP16 denormal mode.
7306c3fb27SDimitry Andric   uint32_t fpDenormModeDPValue() const {
7406c3fb27SDimitry Andric     if (FP64FP16Denormals == DenormalMode::getPreserveSign())
7506c3fb27SDimitry Andric       return FP_DENORM_FLUSH_IN_FLUSH_OUT;
7606c3fb27SDimitry Andric     if (FP64FP16Denormals.Output == DenormalMode::PreserveSign)
7706c3fb27SDimitry Andric       return FP_DENORM_FLUSH_OUT;
7806c3fb27SDimitry Andric     if (FP64FP16Denormals.Input == DenormalMode::PreserveSign)
7906c3fb27SDimitry Andric       return FP_DENORM_FLUSH_IN;
8006c3fb27SDimitry Andric     return FP_DENORM_FLUSH_NONE;
8106c3fb27SDimitry Andric   }
8206c3fb27SDimitry Andric 
8306c3fb27SDimitry Andric   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
8406c3fb27SDimitry Andric   // be able to override.
8506c3fb27SDimitry Andric   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
8606c3fb27SDimitry Andric     return DX10Clamp == CalleeMode.DX10Clamp && IEEE == CalleeMode.IEEE;
8706c3fb27SDimitry Andric   }
8806c3fb27SDimitry Andric };
8906c3fb27SDimitry Andric 
90*5f757f3fSDimitry Andric namespace AMDGPU {
91*5f757f3fSDimitry Andric 
92*5f757f3fSDimitry Andric /// Return values used for llvm.get.rounding
93*5f757f3fSDimitry Andric ///
94*5f757f3fSDimitry Andric /// When both the F32 and F64/F16 modes are the same, returns the standard
95*5f757f3fSDimitry Andric /// values. If they differ, returns an extended mode starting at 8.
96*5f757f3fSDimitry Andric enum AMDGPUFltRounds : int8_t {
97*5f757f3fSDimitry Andric   // Inherit everything from RoundingMode
98*5f757f3fSDimitry Andric   TowardZero = static_cast<int8_t>(RoundingMode::TowardZero),
99*5f757f3fSDimitry Andric   NearestTiesToEven = static_cast<int8_t>(RoundingMode::NearestTiesToEven),
100*5f757f3fSDimitry Andric   TowardPositive = static_cast<int8_t>(RoundingMode::TowardPositive),
101*5f757f3fSDimitry Andric   TowardNegative = static_cast<int8_t>(RoundingMode::TowardNegative),
102*5f757f3fSDimitry Andric   NearestTiesToAwayUnsupported =
103*5f757f3fSDimitry Andric       static_cast<int8_t>(RoundingMode::NearestTiesToAway),
104*5f757f3fSDimitry Andric 
105*5f757f3fSDimitry Andric   Dynamic = static_cast<int8_t>(RoundingMode::Dynamic),
106*5f757f3fSDimitry Andric 
107*5f757f3fSDimitry Andric   // Permute the mismatched rounding mode cases.  If the modes are the same, use
108*5f757f3fSDimitry Andric   // the standard values, otherwise, these values are sorted such that higher
109*5f757f3fSDimitry Andric   // hardware encoded values have higher enum values.
110*5f757f3fSDimitry Andric   NearestTiesToEvenF32_NearestTiesToEvenF64 = NearestTiesToEven,
111*5f757f3fSDimitry Andric   NearestTiesToEvenF32_TowardPositiveF64 = 8,
112*5f757f3fSDimitry Andric   NearestTiesToEvenF32_TowardNegativeF64 = 9,
113*5f757f3fSDimitry Andric   NearestTiesToEvenF32_TowardZeroF64 = 10,
114*5f757f3fSDimitry Andric 
115*5f757f3fSDimitry Andric   TowardPositiveF32_NearestTiesToEvenF64 = 11,
116*5f757f3fSDimitry Andric   TowardPositiveF32_TowardPositiveF64 = TowardPositive,
117*5f757f3fSDimitry Andric   TowardPositiveF32_TowardNegativeF64 = 12,
118*5f757f3fSDimitry Andric   TowardPositiveF32_TowardZeroF64 = 13,
119*5f757f3fSDimitry Andric 
120*5f757f3fSDimitry Andric   TowardNegativeF32_NearestTiesToEvenF64 = 14,
121*5f757f3fSDimitry Andric   TowardNegativeF32_TowardPositiveF64 = 15,
122*5f757f3fSDimitry Andric   TowardNegativeF32_TowardNegativeF64 = TowardNegative,
123*5f757f3fSDimitry Andric   TowardNegativeF32_TowardZeroF64 = 16,
124*5f757f3fSDimitry Andric 
125*5f757f3fSDimitry Andric   TowardZeroF32_NearestTiesToEvenF64 = 17,
126*5f757f3fSDimitry Andric   TowardZeroF32_TowardPositiveF64 = 18,
127*5f757f3fSDimitry Andric   TowardZeroF32_TowardNegativeF64 = 19,
128*5f757f3fSDimitry Andric   TowardZeroF32_TowardZeroF64 = TowardZero,
129*5f757f3fSDimitry Andric 
130*5f757f3fSDimitry Andric   Invalid = static_cast<int8_t>(RoundingMode::Invalid)
131*5f757f3fSDimitry Andric };
132*5f757f3fSDimitry Andric 
133*5f757f3fSDimitry Andric /// Offset of nonstandard values for llvm.get.rounding results from the largest
134*5f757f3fSDimitry Andric /// supported mode.
135*5f757f3fSDimitry Andric static constexpr uint32_t ExtendedFltRoundOffset = 4;
136*5f757f3fSDimitry Andric 
137*5f757f3fSDimitry Andric /// Offset in mode register of f32 rounding mode.
138*5f757f3fSDimitry Andric static constexpr uint32_t F32FltRoundOffset = 0;
139*5f757f3fSDimitry Andric 
140*5f757f3fSDimitry Andric /// Offset in mode register of f64/f16 rounding mode.
141*5f757f3fSDimitry Andric static constexpr uint32_t F64FltRoundOffset = 2;
142*5f757f3fSDimitry Andric 
143*5f757f3fSDimitry Andric // Bit indexed table to convert from hardware rounding mode values to FLT_ROUNDS
144*5f757f3fSDimitry Andric // values.
145*5f757f3fSDimitry Andric extern const uint64_t FltRoundConversionTable;
146*5f757f3fSDimitry Andric 
147*5f757f3fSDimitry Andric } // end namespace AMDGPU
148*5f757f3fSDimitry Andric 
14906c3fb27SDimitry Andric } // end namespace llvm
15006c3fb27SDimitry Andric 
15106c3fb27SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
152