106c3fb27SDimitry Andric //===-- SIModeRegisterDefaults.h --------------------------------*- C++ -*-===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric 906c3fb27SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H 1006c3fb27SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H 1106c3fb27SDimitry Andric 1206c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 1306c3fb27SDimitry Andric #include "llvm/ADT/FloatingPointMode.h" 1406c3fb27SDimitry Andric 1506c3fb27SDimitry Andric namespace llvm { 1606c3fb27SDimitry Andric 17*5f757f3fSDimitry Andric class GCNSubtarget; 18*5f757f3fSDimitry Andric 1906c3fb27SDimitry Andric // Track defaults for fields in the MODE register. 2006c3fb27SDimitry Andric struct SIModeRegisterDefaults { 2106c3fb27SDimitry Andric /// Floating point opcodes that support exception flag gathering quiet and 2206c3fb27SDimitry Andric /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 2306c3fb27SDimitry Andric /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 2406c3fb27SDimitry Andric /// quieting. 2506c3fb27SDimitry Andric bool IEEE : 1; 2606c3fb27SDimitry Andric 2706c3fb27SDimitry Andric /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 2806c3fb27SDimitry Andric /// clamp NaN to zero; otherwise, pass NaN through. 2906c3fb27SDimitry Andric bool DX10Clamp : 1; 3006c3fb27SDimitry Andric 3106c3fb27SDimitry Andric /// If this is set, neither input or output denormals are flushed for most f32 3206c3fb27SDimitry Andric /// instructions. 3306c3fb27SDimitry Andric DenormalMode FP32Denormals; 3406c3fb27SDimitry Andric 3506c3fb27SDimitry Andric /// If this is set, neither input or output denormals are flushed for both f64 3606c3fb27SDimitry Andric /// and f16/v2f16 instructions. 3706c3fb27SDimitry Andric DenormalMode FP64FP16Denormals; 3806c3fb27SDimitry Andric 3906c3fb27SDimitry Andric SIModeRegisterDefaults() : 4006c3fb27SDimitry Andric IEEE(true), 4106c3fb27SDimitry Andric DX10Clamp(true), 4206c3fb27SDimitry Andric FP32Denormals(DenormalMode::getIEEE()), 4306c3fb27SDimitry Andric FP64FP16Denormals(DenormalMode::getIEEE()) {} 4406c3fb27SDimitry Andric 45*5f757f3fSDimitry Andric SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST); 4606c3fb27SDimitry Andric 4706c3fb27SDimitry Andric static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 4806c3fb27SDimitry Andric SIModeRegisterDefaults Mode; 4906c3fb27SDimitry Andric Mode.IEEE = !AMDGPU::isShader(CC); 5006c3fb27SDimitry Andric return Mode; 5106c3fb27SDimitry Andric } 5206c3fb27SDimitry Andric 5306c3fb27SDimitry Andric bool operator==(const SIModeRegisterDefaults Other) const { 5406c3fb27SDimitry Andric return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 5506c3fb27SDimitry Andric FP32Denormals == Other.FP32Denormals && 5606c3fb27SDimitry Andric FP64FP16Denormals == Other.FP64FP16Denormals; 5706c3fb27SDimitry Andric } 5806c3fb27SDimitry Andric 5906c3fb27SDimitry Andric /// Get the encoding value for the FP_DENORM bits of the mode register for the 6006c3fb27SDimitry Andric /// FP32 denormal mode. 6106c3fb27SDimitry Andric uint32_t fpDenormModeSPValue() const { 6206c3fb27SDimitry Andric if (FP32Denormals == DenormalMode::getPreserveSign()) 6306c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN_FLUSH_OUT; 6406c3fb27SDimitry Andric if (FP32Denormals.Output == DenormalMode::PreserveSign) 6506c3fb27SDimitry Andric return FP_DENORM_FLUSH_OUT; 6606c3fb27SDimitry Andric if (FP32Denormals.Input == DenormalMode::PreserveSign) 6706c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN; 6806c3fb27SDimitry Andric return FP_DENORM_FLUSH_NONE; 6906c3fb27SDimitry Andric } 7006c3fb27SDimitry Andric 7106c3fb27SDimitry Andric /// Get the encoding value for the FP_DENORM bits of the mode register for the 7206c3fb27SDimitry Andric /// FP64/FP16 denormal mode. 7306c3fb27SDimitry Andric uint32_t fpDenormModeDPValue() const { 7406c3fb27SDimitry Andric if (FP64FP16Denormals == DenormalMode::getPreserveSign()) 7506c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN_FLUSH_OUT; 7606c3fb27SDimitry Andric if (FP64FP16Denormals.Output == DenormalMode::PreserveSign) 7706c3fb27SDimitry Andric return FP_DENORM_FLUSH_OUT; 7806c3fb27SDimitry Andric if (FP64FP16Denormals.Input == DenormalMode::PreserveSign) 7906c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN; 8006c3fb27SDimitry Andric return FP_DENORM_FLUSH_NONE; 8106c3fb27SDimitry Andric } 8206c3fb27SDimitry Andric 8306c3fb27SDimitry Andric // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 8406c3fb27SDimitry Andric // be able to override. 8506c3fb27SDimitry Andric bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 8606c3fb27SDimitry Andric return DX10Clamp == CalleeMode.DX10Clamp && IEEE == CalleeMode.IEEE; 8706c3fb27SDimitry Andric } 8806c3fb27SDimitry Andric }; 8906c3fb27SDimitry Andric 90*5f757f3fSDimitry Andric namespace AMDGPU { 91*5f757f3fSDimitry Andric 92*5f757f3fSDimitry Andric /// Return values used for llvm.get.rounding 93*5f757f3fSDimitry Andric /// 94*5f757f3fSDimitry Andric /// When both the F32 and F64/F16 modes are the same, returns the standard 95*5f757f3fSDimitry Andric /// values. If they differ, returns an extended mode starting at 8. 96*5f757f3fSDimitry Andric enum AMDGPUFltRounds : int8_t { 97*5f757f3fSDimitry Andric // Inherit everything from RoundingMode 98*5f757f3fSDimitry Andric TowardZero = static_cast<int8_t>(RoundingMode::TowardZero), 99*5f757f3fSDimitry Andric NearestTiesToEven = static_cast<int8_t>(RoundingMode::NearestTiesToEven), 100*5f757f3fSDimitry Andric TowardPositive = static_cast<int8_t>(RoundingMode::TowardPositive), 101*5f757f3fSDimitry Andric TowardNegative = static_cast<int8_t>(RoundingMode::TowardNegative), 102*5f757f3fSDimitry Andric NearestTiesToAwayUnsupported = 103*5f757f3fSDimitry Andric static_cast<int8_t>(RoundingMode::NearestTiesToAway), 104*5f757f3fSDimitry Andric 105*5f757f3fSDimitry Andric Dynamic = static_cast<int8_t>(RoundingMode::Dynamic), 106*5f757f3fSDimitry Andric 107*5f757f3fSDimitry Andric // Permute the mismatched rounding mode cases. If the modes are the same, use 108*5f757f3fSDimitry Andric // the standard values, otherwise, these values are sorted such that higher 109*5f757f3fSDimitry Andric // hardware encoded values have higher enum values. 110*5f757f3fSDimitry Andric NearestTiesToEvenF32_NearestTiesToEvenF64 = NearestTiesToEven, 111*5f757f3fSDimitry Andric NearestTiesToEvenF32_TowardPositiveF64 = 8, 112*5f757f3fSDimitry Andric NearestTiesToEvenF32_TowardNegativeF64 = 9, 113*5f757f3fSDimitry Andric NearestTiesToEvenF32_TowardZeroF64 = 10, 114*5f757f3fSDimitry Andric 115*5f757f3fSDimitry Andric TowardPositiveF32_NearestTiesToEvenF64 = 11, 116*5f757f3fSDimitry Andric TowardPositiveF32_TowardPositiveF64 = TowardPositive, 117*5f757f3fSDimitry Andric TowardPositiveF32_TowardNegativeF64 = 12, 118*5f757f3fSDimitry Andric TowardPositiveF32_TowardZeroF64 = 13, 119*5f757f3fSDimitry Andric 120*5f757f3fSDimitry Andric TowardNegativeF32_NearestTiesToEvenF64 = 14, 121*5f757f3fSDimitry Andric TowardNegativeF32_TowardPositiveF64 = 15, 122*5f757f3fSDimitry Andric TowardNegativeF32_TowardNegativeF64 = TowardNegative, 123*5f757f3fSDimitry Andric TowardNegativeF32_TowardZeroF64 = 16, 124*5f757f3fSDimitry Andric 125*5f757f3fSDimitry Andric TowardZeroF32_NearestTiesToEvenF64 = 17, 126*5f757f3fSDimitry Andric TowardZeroF32_TowardPositiveF64 = 18, 127*5f757f3fSDimitry Andric TowardZeroF32_TowardNegativeF64 = 19, 128*5f757f3fSDimitry Andric TowardZeroF32_TowardZeroF64 = TowardZero, 129*5f757f3fSDimitry Andric 130*5f757f3fSDimitry Andric Invalid = static_cast<int8_t>(RoundingMode::Invalid) 131*5f757f3fSDimitry Andric }; 132*5f757f3fSDimitry Andric 133*5f757f3fSDimitry Andric /// Offset of nonstandard values for llvm.get.rounding results from the largest 134*5f757f3fSDimitry Andric /// supported mode. 135*5f757f3fSDimitry Andric static constexpr uint32_t ExtendedFltRoundOffset = 4; 136*5f757f3fSDimitry Andric 137*5f757f3fSDimitry Andric /// Offset in mode register of f32 rounding mode. 138*5f757f3fSDimitry Andric static constexpr uint32_t F32FltRoundOffset = 0; 139*5f757f3fSDimitry Andric 140*5f757f3fSDimitry Andric /// Offset in mode register of f64/f16 rounding mode. 141*5f757f3fSDimitry Andric static constexpr uint32_t F64FltRoundOffset = 2; 142*5f757f3fSDimitry Andric 143*5f757f3fSDimitry Andric // Bit indexed table to convert from hardware rounding mode values to FLT_ROUNDS 144*5f757f3fSDimitry Andric // values. 145*5f757f3fSDimitry Andric extern const uint64_t FltRoundConversionTable; 146*5f757f3fSDimitry Andric 147*5f757f3fSDimitry Andric } // end namespace AMDGPU 148*5f757f3fSDimitry Andric 14906c3fb27SDimitry Andric } // end namespace llvm 15006c3fb27SDimitry Andric 15106c3fb27SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H 152