106c3fb27SDimitry Andric //===-- SIModeRegisterDefaults.cpp ------------------------------*- C++ -*-===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric 906c3fb27SDimitry Andric #include "SIModeRegisterDefaults.h" 10*5f757f3fSDimitry Andric #include "GCNSubtarget.h" 1106c3fb27SDimitry Andric 1206c3fb27SDimitry Andric using namespace llvm; 1306c3fb27SDimitry Andric 14*5f757f3fSDimitry Andric SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F, 15*5f757f3fSDimitry Andric const GCNSubtarget &ST) { 1606c3fb27SDimitry Andric *this = getDefaultForCallingConv(F.getCallingConv()); 1706c3fb27SDimitry Andric 18*5f757f3fSDimitry Andric if (ST.hasIEEEMode()) { 1906c3fb27SDimitry Andric StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); 2006c3fb27SDimitry Andric if (!IEEEAttr.empty()) 2106c3fb27SDimitry Andric IEEE = IEEEAttr == "true"; 22*5f757f3fSDimitry Andric } 2306c3fb27SDimitry Andric 24*5f757f3fSDimitry Andric if (ST.hasDX10ClampMode()) { 2506c3fb27SDimitry Andric StringRef DX10ClampAttr = 2606c3fb27SDimitry Andric F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString(); 2706c3fb27SDimitry Andric if (!DX10ClampAttr.empty()) 2806c3fb27SDimitry Andric DX10Clamp = DX10ClampAttr == "true"; 29*5f757f3fSDimitry Andric } 3006c3fb27SDimitry Andric 3106c3fb27SDimitry Andric StringRef DenormF32Attr = 3206c3fb27SDimitry Andric F.getFnAttribute("denormal-fp-math-f32").getValueAsString(); 3306c3fb27SDimitry Andric if (!DenormF32Attr.empty()) 3406c3fb27SDimitry Andric FP32Denormals = parseDenormalFPAttribute(DenormF32Attr); 3506c3fb27SDimitry Andric 3606c3fb27SDimitry Andric StringRef DenormAttr = 3706c3fb27SDimitry Andric F.getFnAttribute("denormal-fp-math").getValueAsString(); 3806c3fb27SDimitry Andric if (!DenormAttr.empty()) { 3906c3fb27SDimitry Andric DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr); 4006c3fb27SDimitry Andric if (DenormF32Attr.empty()) 4106c3fb27SDimitry Andric FP32Denormals = DenormMode; 4206c3fb27SDimitry Andric FP64FP16Denormals = DenormMode; 4306c3fb27SDimitry Andric } 4406c3fb27SDimitry Andric } 45*5f757f3fSDimitry Andric 46*5f757f3fSDimitry Andric using namespace AMDGPU; 47*5f757f3fSDimitry Andric 48*5f757f3fSDimitry Andric /// Combine f32 and f64 rounding modes into a combined rounding mode value. 49*5f757f3fSDimitry Andric static constexpr uint32_t getModeRegisterRoundMode(uint32_t HWFP32Val, 50*5f757f3fSDimitry Andric uint32_t HWFP64Val) { 51*5f757f3fSDimitry Andric return HWFP32Val << F32FltRoundOffset | HWFP64Val << F64FltRoundOffset; 52*5f757f3fSDimitry Andric } 53*5f757f3fSDimitry Andric 54*5f757f3fSDimitry Andric static constexpr uint64_t encodeFltRoundsTable(uint32_t FltRoundsVal, 55*5f757f3fSDimitry Andric uint32_t HWF32Val, 56*5f757f3fSDimitry Andric uint32_t HWF64Val) { 57*5f757f3fSDimitry Andric uint32_t ModeVal = getModeRegisterRoundMode(HWF32Val, HWF64Val); 58*5f757f3fSDimitry Andric if (FltRoundsVal > TowardNegative) 59*5f757f3fSDimitry Andric FltRoundsVal -= ExtendedFltRoundOffset; 60*5f757f3fSDimitry Andric 61*5f757f3fSDimitry Andric uint32_t BitIndex = ModeVal << 2; 62*5f757f3fSDimitry Andric return static_cast<uint64_t>(FltRoundsVal) << BitIndex; 63*5f757f3fSDimitry Andric } 64*5f757f3fSDimitry Andric 65*5f757f3fSDimitry Andric // Encode FLT_ROUNDS value where the two rounding modes are the same and use a 66*5f757f3fSDimitry Andric // standard value 67*5f757f3fSDimitry Andric static constexpr uint64_t 68*5f757f3fSDimitry Andric encodeFltRoundsTableSame(AMDGPUFltRounds FltRoundsMode, uint32_t HWVal) { 69*5f757f3fSDimitry Andric return encodeFltRoundsTable(FltRoundsMode, HWVal, HWVal); 70*5f757f3fSDimitry Andric } 71*5f757f3fSDimitry Andric 72*5f757f3fSDimitry Andric // Convert mode register encoded rounding mode to AMDGPUFltRounds 73*5f757f3fSDimitry Andric static constexpr AMDGPUFltRounds 74*5f757f3fSDimitry Andric decodeIndexFltRoundConversionTable(uint32_t HWMode) { 75*5f757f3fSDimitry Andric uint32_t TableRead = (FltRoundConversionTable >> (HWMode << 2)) & 0xf; 76*5f757f3fSDimitry Andric if (TableRead > TowardNegative) 77*5f757f3fSDimitry Andric TableRead += ExtendedFltRoundOffset; 78*5f757f3fSDimitry Andric return static_cast<AMDGPUFltRounds>(TableRead); 79*5f757f3fSDimitry Andric } 80*5f757f3fSDimitry Andric 81*5f757f3fSDimitry Andric static constexpr uint32_t HWTowardZero = FP_ROUND_ROUND_TO_ZERO; 82*5f757f3fSDimitry Andric static constexpr uint32_t HWNearestTiesToEven = FP_ROUND_ROUND_TO_NEAREST; 83*5f757f3fSDimitry Andric static constexpr uint32_t HWTowardPositive = FP_ROUND_ROUND_TO_INF; 84*5f757f3fSDimitry Andric static constexpr uint32_t HWTowardNegative = FP_ROUND_ROUND_TO_NEGINF; 85*5f757f3fSDimitry Andric 86*5f757f3fSDimitry Andric const uint64_t AMDGPU::FltRoundConversionTable = 87*5f757f3fSDimitry Andric encodeFltRoundsTableSame(TowardZeroF32_TowardZeroF64, HWTowardZero) | 88*5f757f3fSDimitry Andric encodeFltRoundsTableSame(NearestTiesToEvenF32_NearestTiesToEvenF64, 89*5f757f3fSDimitry Andric HWNearestTiesToEven) | 90*5f757f3fSDimitry Andric encodeFltRoundsTableSame(TowardPositiveF32_TowardPositiveF64, 91*5f757f3fSDimitry Andric HWTowardPositive) | 92*5f757f3fSDimitry Andric encodeFltRoundsTableSame(TowardNegativeF32_TowardNegativeF64, 93*5f757f3fSDimitry Andric HWTowardNegative) | 94*5f757f3fSDimitry Andric 95*5f757f3fSDimitry Andric encodeFltRoundsTable(TowardZeroF32_NearestTiesToEvenF64, HWTowardZero, 96*5f757f3fSDimitry Andric HWNearestTiesToEven) | 97*5f757f3fSDimitry Andric encodeFltRoundsTable(TowardZeroF32_TowardPositiveF64, HWTowardZero, 98*5f757f3fSDimitry Andric HWTowardPositive) | 99*5f757f3fSDimitry Andric encodeFltRoundsTable(TowardZeroF32_TowardNegativeF64, HWTowardZero, 100*5f757f3fSDimitry Andric HWTowardNegative) | 101*5f757f3fSDimitry Andric 102*5f757f3fSDimitry Andric encodeFltRoundsTable(NearestTiesToEvenF32_TowardZeroF64, 103*5f757f3fSDimitry Andric HWNearestTiesToEven, HWTowardZero) | 104*5f757f3fSDimitry Andric encodeFltRoundsTable(NearestTiesToEvenF32_TowardPositiveF64, 105*5f757f3fSDimitry Andric HWNearestTiesToEven, HWTowardPositive) | 106*5f757f3fSDimitry Andric encodeFltRoundsTable(NearestTiesToEvenF32_TowardNegativeF64, 107*5f757f3fSDimitry Andric HWNearestTiesToEven, HWTowardNegative) | 108*5f757f3fSDimitry Andric 109*5f757f3fSDimitry Andric encodeFltRoundsTable(TowardPositiveF32_TowardZeroF64, HWTowardPositive, 110*5f757f3fSDimitry Andric HWTowardZero) | 111*5f757f3fSDimitry Andric encodeFltRoundsTable(TowardPositiveF32_NearestTiesToEvenF64, 112*5f757f3fSDimitry Andric HWTowardPositive, HWNearestTiesToEven) | 113*5f757f3fSDimitry Andric encodeFltRoundsTable(TowardPositiveF32_TowardNegativeF64, HWTowardPositive, 114*5f757f3fSDimitry Andric HWTowardNegative) | 115*5f757f3fSDimitry Andric 116*5f757f3fSDimitry Andric encodeFltRoundsTable(TowardNegativeF32_TowardZeroF64, HWTowardNegative, 117*5f757f3fSDimitry Andric HWTowardZero) | 118*5f757f3fSDimitry Andric encodeFltRoundsTable(TowardNegativeF32_NearestTiesToEvenF64, 119*5f757f3fSDimitry Andric HWTowardNegative, HWNearestTiesToEven) | 120*5f757f3fSDimitry Andric encodeFltRoundsTable(TowardNegativeF32_TowardPositiveF64, HWTowardNegative, 121*5f757f3fSDimitry Andric HWTowardPositive); 122*5f757f3fSDimitry Andric 123*5f757f3fSDimitry Andric // Verify evaluation of FltRoundConversionTable 124*5f757f3fSDimitry Andric 125*5f757f3fSDimitry Andric // If both modes are the same, should return the standard values. 126*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 127*5f757f3fSDimitry Andric HWTowardZero, HWTowardZero)) == AMDGPUFltRounds::TowardZero); 128*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 129*5f757f3fSDimitry Andric HWNearestTiesToEven, HWNearestTiesToEven)) == 130*5f757f3fSDimitry Andric AMDGPUFltRounds::NearestTiesToEven); 131*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 132*5f757f3fSDimitry Andric HWTowardPositive, HWTowardPositive)) == 133*5f757f3fSDimitry Andric AMDGPUFltRounds::TowardPositive); 134*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 135*5f757f3fSDimitry Andric HWTowardNegative, HWTowardNegative)) == 136*5f757f3fSDimitry Andric AMDGPUFltRounds::TowardNegative); 137*5f757f3fSDimitry Andric 138*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 139*5f757f3fSDimitry Andric HWTowardZero, HWNearestTiesToEven)) == 140*5f757f3fSDimitry Andric TowardZeroF32_NearestTiesToEvenF64); 141*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable( 142*5f757f3fSDimitry Andric getModeRegisterRoundMode(HWTowardZero, HWTowardPositive)) == 143*5f757f3fSDimitry Andric TowardZeroF32_TowardPositiveF64); 144*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable( 145*5f757f3fSDimitry Andric getModeRegisterRoundMode(HWTowardZero, HWTowardNegative)) == 146*5f757f3fSDimitry Andric TowardZeroF32_TowardNegativeF64); 147*5f757f3fSDimitry Andric 148*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 149*5f757f3fSDimitry Andric HWNearestTiesToEven, HWTowardZero)) == 150*5f757f3fSDimitry Andric NearestTiesToEvenF32_TowardZeroF64); 151*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 152*5f757f3fSDimitry Andric HWNearestTiesToEven, HWTowardPositive)) == 153*5f757f3fSDimitry Andric NearestTiesToEvenF32_TowardPositiveF64); 154*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 155*5f757f3fSDimitry Andric HWNearestTiesToEven, HWTowardNegative)) == 156*5f757f3fSDimitry Andric NearestTiesToEvenF32_TowardNegativeF64); 157*5f757f3fSDimitry Andric 158*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable( 159*5f757f3fSDimitry Andric getModeRegisterRoundMode(HWTowardPositive, HWTowardZero)) == 160*5f757f3fSDimitry Andric TowardPositiveF32_TowardZeroF64); 161*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 162*5f757f3fSDimitry Andric HWTowardPositive, HWNearestTiesToEven)) == 163*5f757f3fSDimitry Andric TowardPositiveF32_NearestTiesToEvenF64); 164*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 165*5f757f3fSDimitry Andric HWTowardPositive, HWTowardNegative)) == 166*5f757f3fSDimitry Andric TowardPositiveF32_TowardNegativeF64); 167*5f757f3fSDimitry Andric 168*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable( 169*5f757f3fSDimitry Andric getModeRegisterRoundMode(HWTowardNegative, HWTowardZero)) == 170*5f757f3fSDimitry Andric TowardNegativeF32_TowardZeroF64); 171*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 172*5f757f3fSDimitry Andric HWTowardNegative, HWNearestTiesToEven)) == 173*5f757f3fSDimitry Andric TowardNegativeF32_NearestTiesToEvenF64); 174*5f757f3fSDimitry Andric static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode( 175*5f757f3fSDimitry Andric HWTowardNegative, HWTowardPositive)) == 176*5f757f3fSDimitry Andric TowardNegativeF32_TowardPositiveF64); 177