xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
106c3fb27SDimitry Andric //===-- SIModeRegisterDefaults.h --------------------------------*- C++ -*-===//
206c3fb27SDimitry Andric //
306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606c3fb27SDimitry Andric //
706c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
806c3fb27SDimitry Andric 
906c3fb27SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
1006c3fb27SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
1106c3fb27SDimitry Andric 
1206c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
1306c3fb27SDimitry Andric #include "llvm/ADT/FloatingPointMode.h"
1406c3fb27SDimitry Andric 
1506c3fb27SDimitry Andric namespace llvm {
1606c3fb27SDimitry Andric 
175f757f3fSDimitry Andric class GCNSubtarget;
185f757f3fSDimitry Andric 
1906c3fb27SDimitry Andric // Track defaults for fields in the MODE register.
2006c3fb27SDimitry Andric struct SIModeRegisterDefaults {
2106c3fb27SDimitry Andric   /// Floating point opcodes that support exception flag gathering quiet and
2206c3fb27SDimitry Andric   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
2306c3fb27SDimitry Andric   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
2406c3fb27SDimitry Andric   /// quieting.
2506c3fb27SDimitry Andric   bool IEEE : 1;
2606c3fb27SDimitry Andric 
2706c3fb27SDimitry Andric   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
2806c3fb27SDimitry Andric   /// clamp NaN to zero; otherwise, pass NaN through.
2906c3fb27SDimitry Andric   bool DX10Clamp : 1;
3006c3fb27SDimitry Andric 
3106c3fb27SDimitry Andric   /// If this is set, neither input or output denormals are flushed for most f32
3206c3fb27SDimitry Andric   /// instructions.
3306c3fb27SDimitry Andric   DenormalMode FP32Denormals;
3406c3fb27SDimitry Andric 
3506c3fb27SDimitry Andric   /// If this is set, neither input or output denormals are flushed for both f64
3606c3fb27SDimitry Andric   /// and f16/v2f16 instructions.
3706c3fb27SDimitry Andric   DenormalMode FP64FP16Denormals;
3806c3fb27SDimitry Andric 
3906c3fb27SDimitry Andric   SIModeRegisterDefaults() :
4006c3fb27SDimitry Andric     IEEE(true),
4106c3fb27SDimitry Andric     DX10Clamp(true),
4206c3fb27SDimitry Andric     FP32Denormals(DenormalMode::getIEEE()),
4306c3fb27SDimitry Andric     FP64FP16Denormals(DenormalMode::getIEEE()) {}
4406c3fb27SDimitry Andric 
455f757f3fSDimitry Andric   SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST);
4606c3fb27SDimitry Andric 
4706c3fb27SDimitry Andric   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
4806c3fb27SDimitry Andric     SIModeRegisterDefaults Mode;
4906c3fb27SDimitry Andric     Mode.IEEE = !AMDGPU::isShader(CC);
5006c3fb27SDimitry Andric     return Mode;
5106c3fb27SDimitry Andric   }
5206c3fb27SDimitry Andric 
5306c3fb27SDimitry Andric   bool operator==(const SIModeRegisterDefaults Other) const {
5406c3fb27SDimitry Andric     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
5506c3fb27SDimitry Andric            FP32Denormals == Other.FP32Denormals &&
5606c3fb27SDimitry Andric            FP64FP16Denormals == Other.FP64FP16Denormals;
5706c3fb27SDimitry Andric   }
5806c3fb27SDimitry Andric 
5906c3fb27SDimitry Andric   /// Get the encoding value for the FP_DENORM bits of the mode register for the
6006c3fb27SDimitry Andric   /// FP32 denormal mode.
6106c3fb27SDimitry Andric   uint32_t fpDenormModeSPValue() const {
6206c3fb27SDimitry Andric     if (FP32Denormals == DenormalMode::getPreserveSign())
6306c3fb27SDimitry Andric       return FP_DENORM_FLUSH_IN_FLUSH_OUT;
6406c3fb27SDimitry Andric     if (FP32Denormals.Output == DenormalMode::PreserveSign)
6506c3fb27SDimitry Andric       return FP_DENORM_FLUSH_OUT;
6606c3fb27SDimitry Andric     if (FP32Denormals.Input == DenormalMode::PreserveSign)
6706c3fb27SDimitry Andric       return FP_DENORM_FLUSH_IN;
6806c3fb27SDimitry Andric     return FP_DENORM_FLUSH_NONE;
6906c3fb27SDimitry Andric   }
7006c3fb27SDimitry Andric 
7106c3fb27SDimitry Andric   /// Get the encoding value for the FP_DENORM bits of the mode register for the
7206c3fb27SDimitry Andric   /// FP64/FP16 denormal mode.
7306c3fb27SDimitry Andric   uint32_t fpDenormModeDPValue() const {
7406c3fb27SDimitry Andric     if (FP64FP16Denormals == DenormalMode::getPreserveSign())
7506c3fb27SDimitry Andric       return FP_DENORM_FLUSH_IN_FLUSH_OUT;
7606c3fb27SDimitry Andric     if (FP64FP16Denormals.Output == DenormalMode::PreserveSign)
7706c3fb27SDimitry Andric       return FP_DENORM_FLUSH_OUT;
7806c3fb27SDimitry Andric     if (FP64FP16Denormals.Input == DenormalMode::PreserveSign)
7906c3fb27SDimitry Andric       return FP_DENORM_FLUSH_IN;
8006c3fb27SDimitry Andric     return FP_DENORM_FLUSH_NONE;
8106c3fb27SDimitry Andric   }
8206c3fb27SDimitry Andric 
8306c3fb27SDimitry Andric   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
8406c3fb27SDimitry Andric   // be able to override.
8506c3fb27SDimitry Andric   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
8606c3fb27SDimitry Andric     return DX10Clamp == CalleeMode.DX10Clamp && IEEE == CalleeMode.IEEE;
8706c3fb27SDimitry Andric   }
8806c3fb27SDimitry Andric };
8906c3fb27SDimitry Andric 
905f757f3fSDimitry Andric namespace AMDGPU {
915f757f3fSDimitry Andric 
925f757f3fSDimitry Andric /// Return values used for llvm.get.rounding
935f757f3fSDimitry Andric ///
945f757f3fSDimitry Andric /// When both the F32 and F64/F16 modes are the same, returns the standard
955f757f3fSDimitry Andric /// values. If they differ, returns an extended mode starting at 8.
965f757f3fSDimitry Andric enum AMDGPUFltRounds : int8_t {
975f757f3fSDimitry Andric   // Inherit everything from RoundingMode
985f757f3fSDimitry Andric   TowardZero = static_cast<int8_t>(RoundingMode::TowardZero),
995f757f3fSDimitry Andric   NearestTiesToEven = static_cast<int8_t>(RoundingMode::NearestTiesToEven),
1005f757f3fSDimitry Andric   TowardPositive = static_cast<int8_t>(RoundingMode::TowardPositive),
1015f757f3fSDimitry Andric   TowardNegative = static_cast<int8_t>(RoundingMode::TowardNegative),
1025f757f3fSDimitry Andric   NearestTiesToAwayUnsupported =
1035f757f3fSDimitry Andric       static_cast<int8_t>(RoundingMode::NearestTiesToAway),
1045f757f3fSDimitry Andric 
1055f757f3fSDimitry Andric   Dynamic = static_cast<int8_t>(RoundingMode::Dynamic),
1065f757f3fSDimitry Andric 
1075f757f3fSDimitry Andric   // Permute the mismatched rounding mode cases.  If the modes are the same, use
1085f757f3fSDimitry Andric   // the standard values, otherwise, these values are sorted such that higher
1095f757f3fSDimitry Andric   // hardware encoded values have higher enum values.
1105f757f3fSDimitry Andric   NearestTiesToEvenF32_NearestTiesToEvenF64 = NearestTiesToEven,
1115f757f3fSDimitry Andric   NearestTiesToEvenF32_TowardPositiveF64 = 8,
1125f757f3fSDimitry Andric   NearestTiesToEvenF32_TowardNegativeF64 = 9,
1135f757f3fSDimitry Andric   NearestTiesToEvenF32_TowardZeroF64 = 10,
1145f757f3fSDimitry Andric 
1155f757f3fSDimitry Andric   TowardPositiveF32_NearestTiesToEvenF64 = 11,
1165f757f3fSDimitry Andric   TowardPositiveF32_TowardPositiveF64 = TowardPositive,
1175f757f3fSDimitry Andric   TowardPositiveF32_TowardNegativeF64 = 12,
1185f757f3fSDimitry Andric   TowardPositiveF32_TowardZeroF64 = 13,
1195f757f3fSDimitry Andric 
1205f757f3fSDimitry Andric   TowardNegativeF32_NearestTiesToEvenF64 = 14,
1215f757f3fSDimitry Andric   TowardNegativeF32_TowardPositiveF64 = 15,
1225f757f3fSDimitry Andric   TowardNegativeF32_TowardNegativeF64 = TowardNegative,
1235f757f3fSDimitry Andric   TowardNegativeF32_TowardZeroF64 = 16,
1245f757f3fSDimitry Andric 
1255f757f3fSDimitry Andric   TowardZeroF32_NearestTiesToEvenF64 = 17,
1265f757f3fSDimitry Andric   TowardZeroF32_TowardPositiveF64 = 18,
1275f757f3fSDimitry Andric   TowardZeroF32_TowardNegativeF64 = 19,
1285f757f3fSDimitry Andric   TowardZeroF32_TowardZeroF64 = TowardZero,
1295f757f3fSDimitry Andric 
1305f757f3fSDimitry Andric   Invalid = static_cast<int8_t>(RoundingMode::Invalid)
1315f757f3fSDimitry Andric };
1325f757f3fSDimitry Andric 
1335f757f3fSDimitry Andric /// Offset of nonstandard values for llvm.get.rounding results from the largest
1345f757f3fSDimitry Andric /// supported mode.
1355f757f3fSDimitry Andric static constexpr uint32_t ExtendedFltRoundOffset = 4;
1365f757f3fSDimitry Andric 
1375f757f3fSDimitry Andric /// Offset in mode register of f32 rounding mode.
1385f757f3fSDimitry Andric static constexpr uint32_t F32FltRoundOffset = 0;
1395f757f3fSDimitry Andric 
1405f757f3fSDimitry Andric /// Offset in mode register of f64/f16 rounding mode.
1415f757f3fSDimitry Andric static constexpr uint32_t F64FltRoundOffset = 2;
1425f757f3fSDimitry Andric 
1435f757f3fSDimitry Andric // Bit indexed table to convert from hardware rounding mode values to FLT_ROUNDS
1445f757f3fSDimitry Andric // values.
1455f757f3fSDimitry Andric extern const uint64_t FltRoundConversionTable;
1465f757f3fSDimitry Andric 
147*0fca6ea1SDimitry Andric // Bit indexed table to convert from FLT_ROUNDS values to hardware rounding mode
148*0fca6ea1SDimitry Andric // values
149*0fca6ea1SDimitry Andric extern const uint64_t FltRoundToHWConversionTable;
150*0fca6ea1SDimitry Andric 
151*0fca6ea1SDimitry Andric /// Read the hardware rounding mode equivalent of a AMDGPUFltRounds value.
152*0fca6ea1SDimitry Andric uint32_t decodeFltRoundToHWConversionTable(uint32_t FltRounds);
153*0fca6ea1SDimitry Andric 
1545f757f3fSDimitry Andric } // end namespace AMDGPU
1555f757f3fSDimitry Andric 
15606c3fb27SDimitry Andric } // end namespace llvm
15706c3fb27SDimitry Andric 
15806c3fb27SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
159