106c3fb27SDimitry Andric //===-- SIModeRegisterDefaults.h --------------------------------*- C++ -*-===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric 906c3fb27SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H 1006c3fb27SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H 1106c3fb27SDimitry Andric 1206c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 1306c3fb27SDimitry Andric #include "llvm/ADT/FloatingPointMode.h" 1406c3fb27SDimitry Andric 1506c3fb27SDimitry Andric namespace llvm { 1606c3fb27SDimitry Andric 175f757f3fSDimitry Andric class GCNSubtarget; 185f757f3fSDimitry Andric 1906c3fb27SDimitry Andric // Track defaults for fields in the MODE register. 2006c3fb27SDimitry Andric struct SIModeRegisterDefaults { 2106c3fb27SDimitry Andric /// Floating point opcodes that support exception flag gathering quiet and 2206c3fb27SDimitry Andric /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 2306c3fb27SDimitry Andric /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 2406c3fb27SDimitry Andric /// quieting. 2506c3fb27SDimitry Andric bool IEEE : 1; 2606c3fb27SDimitry Andric 2706c3fb27SDimitry Andric /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 2806c3fb27SDimitry Andric /// clamp NaN to zero; otherwise, pass NaN through. 2906c3fb27SDimitry Andric bool DX10Clamp : 1; 3006c3fb27SDimitry Andric 3106c3fb27SDimitry Andric /// If this is set, neither input or output denormals are flushed for most f32 3206c3fb27SDimitry Andric /// instructions. 3306c3fb27SDimitry Andric DenormalMode FP32Denormals; 3406c3fb27SDimitry Andric 3506c3fb27SDimitry Andric /// If this is set, neither input or output denormals are flushed for both f64 3606c3fb27SDimitry Andric /// and f16/v2f16 instructions. 3706c3fb27SDimitry Andric DenormalMode FP64FP16Denormals; 3806c3fb27SDimitry Andric 3906c3fb27SDimitry Andric SIModeRegisterDefaults() : 4006c3fb27SDimitry Andric IEEE(true), 4106c3fb27SDimitry Andric DX10Clamp(true), 4206c3fb27SDimitry Andric FP32Denormals(DenormalMode::getIEEE()), 4306c3fb27SDimitry Andric FP64FP16Denormals(DenormalMode::getIEEE()) {} 4406c3fb27SDimitry Andric 455f757f3fSDimitry Andric SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST); 4606c3fb27SDimitry Andric 4706c3fb27SDimitry Andric static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 4806c3fb27SDimitry Andric SIModeRegisterDefaults Mode; 4906c3fb27SDimitry Andric Mode.IEEE = !AMDGPU::isShader(CC); 5006c3fb27SDimitry Andric return Mode; 5106c3fb27SDimitry Andric } 5206c3fb27SDimitry Andric 5306c3fb27SDimitry Andric bool operator==(const SIModeRegisterDefaults Other) const { 5406c3fb27SDimitry Andric return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 5506c3fb27SDimitry Andric FP32Denormals == Other.FP32Denormals && 5606c3fb27SDimitry Andric FP64FP16Denormals == Other.FP64FP16Denormals; 5706c3fb27SDimitry Andric } 5806c3fb27SDimitry Andric 5906c3fb27SDimitry Andric /// Get the encoding value for the FP_DENORM bits of the mode register for the 6006c3fb27SDimitry Andric /// FP32 denormal mode. 6106c3fb27SDimitry Andric uint32_t fpDenormModeSPValue() const { 6206c3fb27SDimitry Andric if (FP32Denormals == DenormalMode::getPreserveSign()) 6306c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN_FLUSH_OUT; 6406c3fb27SDimitry Andric if (FP32Denormals.Output == DenormalMode::PreserveSign) 6506c3fb27SDimitry Andric return FP_DENORM_FLUSH_OUT; 6606c3fb27SDimitry Andric if (FP32Denormals.Input == DenormalMode::PreserveSign) 6706c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN; 6806c3fb27SDimitry Andric return FP_DENORM_FLUSH_NONE; 6906c3fb27SDimitry Andric } 7006c3fb27SDimitry Andric 7106c3fb27SDimitry Andric /// Get the encoding value for the FP_DENORM bits of the mode register for the 7206c3fb27SDimitry Andric /// FP64/FP16 denormal mode. 7306c3fb27SDimitry Andric uint32_t fpDenormModeDPValue() const { 7406c3fb27SDimitry Andric if (FP64FP16Denormals == DenormalMode::getPreserveSign()) 7506c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN_FLUSH_OUT; 7606c3fb27SDimitry Andric if (FP64FP16Denormals.Output == DenormalMode::PreserveSign) 7706c3fb27SDimitry Andric return FP_DENORM_FLUSH_OUT; 7806c3fb27SDimitry Andric if (FP64FP16Denormals.Input == DenormalMode::PreserveSign) 7906c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN; 8006c3fb27SDimitry Andric return FP_DENORM_FLUSH_NONE; 8106c3fb27SDimitry Andric } 8206c3fb27SDimitry Andric 8306c3fb27SDimitry Andric // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 8406c3fb27SDimitry Andric // be able to override. 8506c3fb27SDimitry Andric bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 8606c3fb27SDimitry Andric return DX10Clamp == CalleeMode.DX10Clamp && IEEE == CalleeMode.IEEE; 8706c3fb27SDimitry Andric } 8806c3fb27SDimitry Andric }; 8906c3fb27SDimitry Andric 905f757f3fSDimitry Andric namespace AMDGPU { 915f757f3fSDimitry Andric 925f757f3fSDimitry Andric /// Return values used for llvm.get.rounding 935f757f3fSDimitry Andric /// 945f757f3fSDimitry Andric /// When both the F32 and F64/F16 modes are the same, returns the standard 955f757f3fSDimitry Andric /// values. If they differ, returns an extended mode starting at 8. 965f757f3fSDimitry Andric enum AMDGPUFltRounds : int8_t { 975f757f3fSDimitry Andric // Inherit everything from RoundingMode 985f757f3fSDimitry Andric TowardZero = static_cast<int8_t>(RoundingMode::TowardZero), 995f757f3fSDimitry Andric NearestTiesToEven = static_cast<int8_t>(RoundingMode::NearestTiesToEven), 1005f757f3fSDimitry Andric TowardPositive = static_cast<int8_t>(RoundingMode::TowardPositive), 1015f757f3fSDimitry Andric TowardNegative = static_cast<int8_t>(RoundingMode::TowardNegative), 1025f757f3fSDimitry Andric NearestTiesToAwayUnsupported = 1035f757f3fSDimitry Andric static_cast<int8_t>(RoundingMode::NearestTiesToAway), 1045f757f3fSDimitry Andric 1055f757f3fSDimitry Andric Dynamic = static_cast<int8_t>(RoundingMode::Dynamic), 1065f757f3fSDimitry Andric 1075f757f3fSDimitry Andric // Permute the mismatched rounding mode cases. If the modes are the same, use 1085f757f3fSDimitry Andric // the standard values, otherwise, these values are sorted such that higher 1095f757f3fSDimitry Andric // hardware encoded values have higher enum values. 1105f757f3fSDimitry Andric NearestTiesToEvenF32_NearestTiesToEvenF64 = NearestTiesToEven, 1115f757f3fSDimitry Andric NearestTiesToEvenF32_TowardPositiveF64 = 8, 1125f757f3fSDimitry Andric NearestTiesToEvenF32_TowardNegativeF64 = 9, 1135f757f3fSDimitry Andric NearestTiesToEvenF32_TowardZeroF64 = 10, 1145f757f3fSDimitry Andric 1155f757f3fSDimitry Andric TowardPositiveF32_NearestTiesToEvenF64 = 11, 1165f757f3fSDimitry Andric TowardPositiveF32_TowardPositiveF64 = TowardPositive, 1175f757f3fSDimitry Andric TowardPositiveF32_TowardNegativeF64 = 12, 1185f757f3fSDimitry Andric TowardPositiveF32_TowardZeroF64 = 13, 1195f757f3fSDimitry Andric 1205f757f3fSDimitry Andric TowardNegativeF32_NearestTiesToEvenF64 = 14, 1215f757f3fSDimitry Andric TowardNegativeF32_TowardPositiveF64 = 15, 1225f757f3fSDimitry Andric TowardNegativeF32_TowardNegativeF64 = TowardNegative, 1235f757f3fSDimitry Andric TowardNegativeF32_TowardZeroF64 = 16, 1245f757f3fSDimitry Andric 1255f757f3fSDimitry Andric TowardZeroF32_NearestTiesToEvenF64 = 17, 1265f757f3fSDimitry Andric TowardZeroF32_TowardPositiveF64 = 18, 1275f757f3fSDimitry Andric TowardZeroF32_TowardNegativeF64 = 19, 1285f757f3fSDimitry Andric TowardZeroF32_TowardZeroF64 = TowardZero, 1295f757f3fSDimitry Andric 1305f757f3fSDimitry Andric Invalid = static_cast<int8_t>(RoundingMode::Invalid) 1315f757f3fSDimitry Andric }; 1325f757f3fSDimitry Andric 1335f757f3fSDimitry Andric /// Offset of nonstandard values for llvm.get.rounding results from the largest 1345f757f3fSDimitry Andric /// supported mode. 1355f757f3fSDimitry Andric static constexpr uint32_t ExtendedFltRoundOffset = 4; 1365f757f3fSDimitry Andric 1375f757f3fSDimitry Andric /// Offset in mode register of f32 rounding mode. 1385f757f3fSDimitry Andric static constexpr uint32_t F32FltRoundOffset = 0; 1395f757f3fSDimitry Andric 1405f757f3fSDimitry Andric /// Offset in mode register of f64/f16 rounding mode. 1415f757f3fSDimitry Andric static constexpr uint32_t F64FltRoundOffset = 2; 1425f757f3fSDimitry Andric 1435f757f3fSDimitry Andric // Bit indexed table to convert from hardware rounding mode values to FLT_ROUNDS 1445f757f3fSDimitry Andric // values. 1455f757f3fSDimitry Andric extern const uint64_t FltRoundConversionTable; 1465f757f3fSDimitry Andric 147*0fca6ea1SDimitry Andric // Bit indexed table to convert from FLT_ROUNDS values to hardware rounding mode 148*0fca6ea1SDimitry Andric // values 149*0fca6ea1SDimitry Andric extern const uint64_t FltRoundToHWConversionTable; 150*0fca6ea1SDimitry Andric 151*0fca6ea1SDimitry Andric /// Read the hardware rounding mode equivalent of a AMDGPUFltRounds value. 152*0fca6ea1SDimitry Andric uint32_t decodeFltRoundToHWConversionTable(uint32_t FltRounds); 153*0fca6ea1SDimitry Andric 1545f757f3fSDimitry Andric } // end namespace AMDGPU 1555f757f3fSDimitry Andric 15606c3fb27SDimitry Andric } // end namespace llvm 15706c3fb27SDimitry Andric 15806c3fb27SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H 159