1*06c3fb27SDimitry Andric //===-- SIModeRegisterDefaults.h --------------------------------*- C++ -*-===// 2*06c3fb27SDimitry Andric // 3*06c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*06c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*06c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*06c3fb27SDimitry Andric // 7*06c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 8*06c3fb27SDimitry Andric 9*06c3fb27SDimitry Andric #ifndef LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H 10*06c3fb27SDimitry Andric #define LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H 11*06c3fb27SDimitry Andric 12*06c3fb27SDimitry Andric #include "Utils/AMDGPUBaseInfo.h" 13*06c3fb27SDimitry Andric #include "llvm/ADT/FloatingPointMode.h" 14*06c3fb27SDimitry Andric 15*06c3fb27SDimitry Andric namespace llvm { 16*06c3fb27SDimitry Andric 17*06c3fb27SDimitry Andric // Track defaults for fields in the MODE register. 18*06c3fb27SDimitry Andric struct SIModeRegisterDefaults { 19*06c3fb27SDimitry Andric /// Floating point opcodes that support exception flag gathering quiet and 20*06c3fb27SDimitry Andric /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10 21*06c3fb27SDimitry Andric /// become IEEE 754- 2008 compliant due to signaling NaN propagation and 22*06c3fb27SDimitry Andric /// quieting. 23*06c3fb27SDimitry Andric bool IEEE : 1; 24*06c3fb27SDimitry Andric 25*06c3fb27SDimitry Andric /// Used by the vector ALU to force DX10-style treatment of NaNs: when set, 26*06c3fb27SDimitry Andric /// clamp NaN to zero; otherwise, pass NaN through. 27*06c3fb27SDimitry Andric bool DX10Clamp : 1; 28*06c3fb27SDimitry Andric 29*06c3fb27SDimitry Andric /// If this is set, neither input or output denormals are flushed for most f32 30*06c3fb27SDimitry Andric /// instructions. 31*06c3fb27SDimitry Andric DenormalMode FP32Denormals; 32*06c3fb27SDimitry Andric 33*06c3fb27SDimitry Andric /// If this is set, neither input or output denormals are flushed for both f64 34*06c3fb27SDimitry Andric /// and f16/v2f16 instructions. 35*06c3fb27SDimitry Andric DenormalMode FP64FP16Denormals; 36*06c3fb27SDimitry Andric 37*06c3fb27SDimitry Andric SIModeRegisterDefaults() : 38*06c3fb27SDimitry Andric IEEE(true), 39*06c3fb27SDimitry Andric DX10Clamp(true), 40*06c3fb27SDimitry Andric FP32Denormals(DenormalMode::getIEEE()), 41*06c3fb27SDimitry Andric FP64FP16Denormals(DenormalMode::getIEEE()) {} 42*06c3fb27SDimitry Andric 43*06c3fb27SDimitry Andric SIModeRegisterDefaults(const Function &F); 44*06c3fb27SDimitry Andric 45*06c3fb27SDimitry Andric static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { 46*06c3fb27SDimitry Andric SIModeRegisterDefaults Mode; 47*06c3fb27SDimitry Andric Mode.IEEE = !AMDGPU::isShader(CC); 48*06c3fb27SDimitry Andric return Mode; 49*06c3fb27SDimitry Andric } 50*06c3fb27SDimitry Andric 51*06c3fb27SDimitry Andric bool operator==(const SIModeRegisterDefaults Other) const { 52*06c3fb27SDimitry Andric return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp && 53*06c3fb27SDimitry Andric FP32Denormals == Other.FP32Denormals && 54*06c3fb27SDimitry Andric FP64FP16Denormals == Other.FP64FP16Denormals; 55*06c3fb27SDimitry Andric } 56*06c3fb27SDimitry Andric 57*06c3fb27SDimitry Andric /// Get the encoding value for the FP_DENORM bits of the mode register for the 58*06c3fb27SDimitry Andric /// FP32 denormal mode. 59*06c3fb27SDimitry Andric uint32_t fpDenormModeSPValue() const { 60*06c3fb27SDimitry Andric if (FP32Denormals == DenormalMode::getPreserveSign()) 61*06c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN_FLUSH_OUT; 62*06c3fb27SDimitry Andric if (FP32Denormals.Output == DenormalMode::PreserveSign) 63*06c3fb27SDimitry Andric return FP_DENORM_FLUSH_OUT; 64*06c3fb27SDimitry Andric if (FP32Denormals.Input == DenormalMode::PreserveSign) 65*06c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN; 66*06c3fb27SDimitry Andric return FP_DENORM_FLUSH_NONE; 67*06c3fb27SDimitry Andric } 68*06c3fb27SDimitry Andric 69*06c3fb27SDimitry Andric /// Get the encoding value for the FP_DENORM bits of the mode register for the 70*06c3fb27SDimitry Andric /// FP64/FP16 denormal mode. 71*06c3fb27SDimitry Andric uint32_t fpDenormModeDPValue() const { 72*06c3fb27SDimitry Andric if (FP64FP16Denormals == DenormalMode::getPreserveSign()) 73*06c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN_FLUSH_OUT; 74*06c3fb27SDimitry Andric if (FP64FP16Denormals.Output == DenormalMode::PreserveSign) 75*06c3fb27SDimitry Andric return FP_DENORM_FLUSH_OUT; 76*06c3fb27SDimitry Andric if (FP64FP16Denormals.Input == DenormalMode::PreserveSign) 77*06c3fb27SDimitry Andric return FP_DENORM_FLUSH_IN; 78*06c3fb27SDimitry Andric return FP_DENORM_FLUSH_NONE; 79*06c3fb27SDimitry Andric } 80*06c3fb27SDimitry Andric 81*06c3fb27SDimitry Andric // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should 82*06c3fb27SDimitry Andric // be able to override. 83*06c3fb27SDimitry Andric bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { 84*06c3fb27SDimitry Andric return DX10Clamp == CalleeMode.DX10Clamp && IEEE == CalleeMode.IEEE; 85*06c3fb27SDimitry Andric } 86*06c3fb27SDimitry Andric }; 87*06c3fb27SDimitry Andric 88*06c3fb27SDimitry Andric } // end namespace llvm 89*06c3fb27SDimitry Andric 90*06c3fb27SDimitry Andric #endif // LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H 91