1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE %s 3; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=dynamic %s | FileCheck -check-prefixes=CHECK,IEEE %s 4; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=preserve-sign %s | FileCheck -check-prefixes=CHECK,DAZ %s 5 6define amdgpu_kernel void @noop_sqrt_fpmath(ptr addrspace(1) %out, float %x) #0 { 7; CHECK-LABEL: define amdgpu_kernel void @noop_sqrt_fpmath 8; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR0:[0-9]+]] { 9; CHECK-NEXT: [[MD_25ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !0 10; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 11; CHECK-NEXT: ret void 12; 13 %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3 14 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 15 ret void 16} 17 18define amdgpu_kernel void @sqrt_fpmath_f32(ptr addrspace(1) %out, float %x) { 19; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32 20; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1:[0-9]+]] { 21; IEEE-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 22; IEEE-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 23; IEEE-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 24; IEEE-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 25; IEEE-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2 26; IEEE-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 27; IEEE-NEXT: [[TMP1:%.*]] = fcmp olt float [[X]], 0x3810000000000000 28; IEEE-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 29; IEEE-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP2]]) 30; IEEE-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 31; IEEE-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 32; IEEE-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) 33; IEEE-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 34; IEEE-NEXT: [[TMP6:%.*]] = fcmp olt float [[X]], 0x3810000000000000 35; IEEE-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 32, i32 0 36; IEEE-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP7]]) 37; IEEE-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]]) 38; IEEE-NEXT: [[TMP10:%.*]] = select i1 [[TMP6]], i32 -16, i32 0 39; IEEE-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP10]]) 40; IEEE-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 41; IEEE-NEXT: [[TMP11:%.*]] = fcmp olt float [[X]], 0x3810000000000000 42; IEEE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 32, i32 0 43; IEEE-NEXT: [[TMP13:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP12]]) 44; IEEE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP13]]) 45; IEEE-NEXT: [[TMP15:%.*]] = select i1 [[TMP11]], i32 -16, i32 0 46; IEEE-NEXT: [[MD_2ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP14]], i32 [[TMP15]]) 47; IEEE-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 48; IEEE-NEXT: ret void 49; 50; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32 51; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1:[0-9]+]] { 52; DAZ-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 53; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 54; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 55; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 56; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 57; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 58; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 59; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 60; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 61; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 62; DAZ-NEXT: [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 63; DAZ-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 64; DAZ-NEXT: ret void 65; 66 %no.md = call float @llvm.sqrt.f32(float %x) 67 store volatile float %no.md, ptr addrspace(1) %out, align 4 68 69 %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1 70 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 71 72 %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2 73 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 74 75 %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0 76 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 77 78 %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3 79 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 80 81 %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4 82 store volatile float %md.2ulp, ptr addrspace(1) %out, align 4 83 ret void 84} 85 86define amdgpu_kernel void @sqrt_fpmath_v2f32(ptr addrspace(1) %out, <2 x float> %x) { 87; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_v2f32 88; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 89; IEEE-NEXT: [[NO_MD:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) 90; IEEE-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 91; IEEE-NEXT: [[MD_HALF_ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !1 92; IEEE-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 93; IEEE-NEXT: [[MD_1ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !2 94; IEEE-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 95; IEEE-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 96; IEEE-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 97; IEEE-NEXT: [[TMP3:%.*]] = fcmp olt float [[TMP1]], 0x3810000000000000 98; IEEE-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 32, i32 0 99; IEEE-NEXT: [[TMP5:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[TMP4]]) 100; IEEE-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP5]]) 101; IEEE-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], i32 -16, i32 0 102; IEEE-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP6]], i32 [[TMP7]]) 103; IEEE-NEXT: [[TMP9:%.*]] = fcmp olt float [[TMP2]], 0x3810000000000000 104; IEEE-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 32, i32 0 105; IEEE-NEXT: [[TMP11:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP2]], i32 [[TMP10]]) 106; IEEE-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP11]]) 107; IEEE-NEXT: [[TMP13:%.*]] = select i1 [[TMP9]], i32 -16, i32 0 108; IEEE-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]]) 109; IEEE-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 110; IEEE-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 111; IEEE-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 112; IEEE-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 113; IEEE-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 114; IEEE-NEXT: [[TMP18:%.*]] = fcmp olt float [[TMP16]], 0x3810000000000000 115; IEEE-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], i32 32, i32 0 116; IEEE-NEXT: [[TMP20:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP19]]) 117; IEEE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP20]]) 118; IEEE-NEXT: [[TMP22:%.*]] = select i1 [[TMP18]], i32 -16, i32 0 119; IEEE-NEXT: [[TMP23:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP21]], i32 [[TMP22]]) 120; IEEE-NEXT: [[TMP24:%.*]] = fcmp olt float [[TMP17]], 0x3810000000000000 121; IEEE-NEXT: [[TMP25:%.*]] = select i1 [[TMP24]], i32 32, i32 0 122; IEEE-NEXT: [[TMP26:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP25]]) 123; IEEE-NEXT: [[TMP27:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP26]]) 124; IEEE-NEXT: [[TMP28:%.*]] = select i1 [[TMP24]], i32 -16, i32 0 125; IEEE-NEXT: [[TMP29:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP27]], i32 [[TMP28]]) 126; IEEE-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i64 0 127; IEEE-NEXT: [[MD_3ULP:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 1 128; IEEE-NEXT: store volatile <2 x float> [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 129; IEEE-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[X]], i64 0 130; IEEE-NEXT: [[TMP32:%.*]] = extractelement <2 x float> [[X]], i64 1 131; IEEE-NEXT: [[TMP33:%.*]] = fcmp olt float [[TMP31]], 0x3810000000000000 132; IEEE-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 32, i32 0 133; IEEE-NEXT: [[TMP35:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP31]], i32 [[TMP34]]) 134; IEEE-NEXT: [[TMP36:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP35]]) 135; IEEE-NEXT: [[TMP37:%.*]] = select i1 [[TMP33]], i32 -16, i32 0 136; IEEE-NEXT: [[TMP38:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP36]], i32 [[TMP37]]) 137; IEEE-NEXT: [[TMP39:%.*]] = fcmp olt float [[TMP32]], 0x3810000000000000 138; IEEE-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 32, i32 0 139; IEEE-NEXT: [[TMP41:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP40]]) 140; IEEE-NEXT: [[TMP42:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP41]]) 141; IEEE-NEXT: [[TMP43:%.*]] = select i1 [[TMP39]], i32 -16, i32 0 142; IEEE-NEXT: [[TMP44:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP42]], i32 [[TMP43]]) 143; IEEE-NEXT: [[TMP45:%.*]] = insertelement <2 x float> poison, float [[TMP38]], i64 0 144; IEEE-NEXT: [[MD_2ULP:%.*]] = insertelement <2 x float> [[TMP45]], float [[TMP44]], i64 1 145; IEEE-NEXT: store volatile <2 x float> [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 146; IEEE-NEXT: ret void 147; 148; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_v2f32 149; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 150; DAZ-NEXT: [[NO_MD:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) 151; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 152; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath !1 153; DAZ-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 154; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 155; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 156; DAZ-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) 157; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) 158; DAZ-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 159; DAZ-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1 160; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 161; DAZ-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[X]], i64 0 162; DAZ-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[X]], i64 1 163; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP6]]) 164; DAZ-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP7]]) 165; DAZ-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 166; DAZ-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP9]], i64 1 167; DAZ-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 168; DAZ-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[X]], i64 0 169; DAZ-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[X]], i64 1 170; DAZ-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP11]]) 171; DAZ-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP12]]) 172; DAZ-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i64 0 173; DAZ-NEXT: [[MD_3ULP:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 174; DAZ-NEXT: store volatile <2 x float> [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 175; DAZ-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 176; DAZ-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 177; DAZ-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP16]]) 178; DAZ-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP17]]) 179; DAZ-NEXT: [[TMP20:%.*]] = insertelement <2 x float> poison, float [[TMP18]], i64 0 180; DAZ-NEXT: [[MD_2ULP:%.*]] = insertelement <2 x float> [[TMP20]], float [[TMP19]], i64 1 181; DAZ-NEXT: store volatile <2 x float> [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 182; DAZ-NEXT: ret void 183; 184 %no.md = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) 185 store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 4 186 187 %md.half.ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !1 188 store volatile <2 x float> %md.half.ulp, ptr addrspace(1) %out, align 4 189 190 %md.1ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2 191 store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 4 192 193 %md.25ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !0 194 store volatile <2 x float> %md.25ulp, ptr addrspace(1) %out, align 4 195 196 %md.3ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !3 197 store volatile <2 x float> %md.3ulp, ptr addrspace(1) %out, align 4 198 199 %md.2ulp = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4 200 store volatile <2 x float> %md.2ulp, ptr addrspace(1) %out, align 4 201 ret void 202} 203 204define amdgpu_kernel void @sqrt_fpmath_f32_known_nosub(ptr addrspace(1) %out, float nofpclass(sub) %x) { 205; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nosub 206; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[X:%.*]]) #[[ATTR1:[0-9]+]] { 207; CHECK-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 208; CHECK-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 209; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 210; CHECK-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 211; CHECK-NEXT: [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 212; CHECK-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 213; CHECK-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 214; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 215; CHECK-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 216; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 217; CHECK-NEXT: [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 218; CHECK-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 219; CHECK-NEXT: ret void 220; 221 %no.md = call float @llvm.sqrt.f32(float %x) 222 store volatile float %no.md, ptr addrspace(1) %out, align 4 223 224 %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1 225 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 226 227 %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2 228 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 229 230 %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0 231 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 232 233 %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3 234 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 235 236 %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4 237 store volatile float %md.2ulp, ptr addrspace(1) %out, align 4 238 ret void 239} 240 241define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero(ptr addrspace(1) %out, float nofpclass(nzero) %x) { 242; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero 243; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nzero) [[X:%.*]]) #[[ATTR1]] { 244; IEEE-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 245; IEEE-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 246; IEEE-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 247; IEEE-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 248; IEEE-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2 249; IEEE-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 250; IEEE-NEXT: [[TMP1:%.*]] = fcmp olt float [[X]], 0x3810000000000000 251; IEEE-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 252; IEEE-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP2]]) 253; IEEE-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 254; IEEE-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 255; IEEE-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) 256; IEEE-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 257; IEEE-NEXT: [[TMP6:%.*]] = fcmp olt float [[X]], 0x3810000000000000 258; IEEE-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 32, i32 0 259; IEEE-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP7]]) 260; IEEE-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]]) 261; IEEE-NEXT: [[TMP10:%.*]] = select i1 [[TMP6]], i32 -16, i32 0 262; IEEE-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP10]]) 263; IEEE-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 264; IEEE-NEXT: [[TMP11:%.*]] = fcmp olt float [[X]], 0x3810000000000000 265; IEEE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 32, i32 0 266; IEEE-NEXT: [[TMP13:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP12]]) 267; IEEE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP13]]) 268; IEEE-NEXT: [[TMP15:%.*]] = select i1 [[TMP11]], i32 -16, i32 0 269; IEEE-NEXT: [[MD_2ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP14]], i32 [[TMP15]]) 270; IEEE-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 271; IEEE-NEXT: ret void 272; 273; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero 274; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nzero) [[X:%.*]]) #[[ATTR1]] { 275; DAZ-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 276; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 277; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 278; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 279; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 280; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 281; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 282; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 283; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 284; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 285; DAZ-NEXT: [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 286; DAZ-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 287; DAZ-NEXT: ret void 288; 289 %no.md = call float @llvm.sqrt.f32(float %x) 290 store volatile float %no.md, ptr addrspace(1) %out, align 4 291 292 %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1 293 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 294 295 %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2 296 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 297 298 %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0 299 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 300 301 %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3 302 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 303 304 %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4 305 store volatile float %md.2ulp, ptr addrspace(1) %out, align 4 306 ret void 307} 308 309define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub(ptr addrspace(1) %out, float nofpclass(nzero nsub) %x) { 310; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub 311; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nzero nsub) [[X:%.*]]) #[[ATTR1]] { 312; IEEE-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 313; IEEE-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 314; IEEE-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 315; IEEE-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 316; IEEE-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2 317; IEEE-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 318; IEEE-NEXT: [[TMP1:%.*]] = fcmp olt float [[X]], 0x3810000000000000 319; IEEE-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 320; IEEE-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP2]]) 321; IEEE-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 322; IEEE-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 323; IEEE-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) 324; IEEE-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 325; IEEE-NEXT: [[TMP6:%.*]] = fcmp olt float [[X]], 0x3810000000000000 326; IEEE-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 32, i32 0 327; IEEE-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP7]]) 328; IEEE-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]]) 329; IEEE-NEXT: [[TMP10:%.*]] = select i1 [[TMP6]], i32 -16, i32 0 330; IEEE-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP10]]) 331; IEEE-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 332; IEEE-NEXT: [[TMP11:%.*]] = fcmp olt float [[X]], 0x3810000000000000 333; IEEE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 32, i32 0 334; IEEE-NEXT: [[TMP13:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP12]]) 335; IEEE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP13]]) 336; IEEE-NEXT: [[TMP15:%.*]] = select i1 [[TMP11]], i32 -16, i32 0 337; IEEE-NEXT: [[MD_2ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP14]], i32 [[TMP15]]) 338; IEEE-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 339; IEEE-NEXT: ret void 340; 341; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub 342; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(nzero nsub) [[X:%.*]]) #[[ATTR1]] { 343; DAZ-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 344; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 345; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 346; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 347; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 348; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 349; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 350; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 351; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 352; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 353; DAZ-NEXT: [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 354; DAZ-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 355; DAZ-NEXT: ret void 356; 357 %no.md = call float @llvm.sqrt.f32(float %x) 358 store volatile float %no.md, ptr addrspace(1) %out, align 4 359 360 %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1 361 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 362 363 %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2 364 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 365 366 %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0 367 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 368 369 %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3 370 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 371 372 %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4 373 store volatile float %md.2ulp, ptr addrspace(1) %out, align 4 374 ret void 375} 376 377define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub_noinf(ptr addrspace(1) %out, float nofpclass(nzero nsub inf) %x) { 378; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub_noinf 379; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(inf nzero nsub) [[X:%.*]]) #[[ATTR1]] { 380; IEEE-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 381; IEEE-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 382; IEEE-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 383; IEEE-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 384; IEEE-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2 385; IEEE-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 386; IEEE-NEXT: [[TMP1:%.*]] = fcmp olt float [[X]], 0x3810000000000000 387; IEEE-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 388; IEEE-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP2]]) 389; IEEE-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 390; IEEE-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 391; IEEE-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) 392; IEEE-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 393; IEEE-NEXT: [[TMP6:%.*]] = fcmp olt float [[X]], 0x3810000000000000 394; IEEE-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 32, i32 0 395; IEEE-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP7]]) 396; IEEE-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]]) 397; IEEE-NEXT: [[TMP10:%.*]] = select i1 [[TMP6]], i32 -16, i32 0 398; IEEE-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP10]]) 399; IEEE-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 400; IEEE-NEXT: [[TMP11:%.*]] = fcmp olt float [[X]], 0x3810000000000000 401; IEEE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 32, i32 0 402; IEEE-NEXT: [[TMP13:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP12]]) 403; IEEE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP13]]) 404; IEEE-NEXT: [[TMP15:%.*]] = select i1 [[TMP11]], i32 -16, i32 0 405; IEEE-NEXT: [[MD_2ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP14]], i32 [[TMP15]]) 406; IEEE-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 407; IEEE-NEXT: ret void 408; 409; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nonzero_nonsub_noinf 410; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(inf nzero nsub) [[X:%.*]]) #[[ATTR1]] { 411; DAZ-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 412; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 413; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 414; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 415; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 416; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 417; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 418; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 419; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 420; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 421; DAZ-NEXT: [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 422; DAZ-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 423; DAZ-NEXT: ret void 424; 425 %no.md = call float @llvm.sqrt.f32(float %x) 426 store volatile float %no.md, ptr addrspace(1) %out, align 4 427 428 %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1 429 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 430 431 %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2 432 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 433 434 %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0 435 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 436 437 %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3 438 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 439 440 %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4 441 store volatile float %md.2ulp, ptr addrspace(1) %out, align 4 442 ret void 443} 444 445define amdgpu_kernel void @sqrt_fpmath_f32_known_nopsub(ptr addrspace(1) %out, float nofpclass(psub) %x) { 446; IEEE-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nopsub 447; IEEE-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(psub) [[X:%.*]]) #[[ATTR1]] { 448; IEEE-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 449; IEEE-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 450; IEEE-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 451; IEEE-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 452; IEEE-NEXT: [[MD_1ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !2 453; IEEE-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 454; IEEE-NEXT: [[TMP1:%.*]] = fcmp olt float [[X]], 0x3810000000000000 455; IEEE-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 456; IEEE-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP2]]) 457; IEEE-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 458; IEEE-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 459; IEEE-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) 460; IEEE-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 461; IEEE-NEXT: [[TMP6:%.*]] = fcmp olt float [[X]], 0x3810000000000000 462; IEEE-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 32, i32 0 463; IEEE-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP7]]) 464; IEEE-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]]) 465; IEEE-NEXT: [[TMP10:%.*]] = select i1 [[TMP6]], i32 -16, i32 0 466; IEEE-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP10]]) 467; IEEE-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 468; IEEE-NEXT: [[TMP11:%.*]] = fcmp olt float [[X]], 0x3810000000000000 469; IEEE-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 32, i32 0 470; IEEE-NEXT: [[TMP13:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP12]]) 471; IEEE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP13]]) 472; IEEE-NEXT: [[TMP15:%.*]] = select i1 [[TMP11]], i32 -16, i32 0 473; IEEE-NEXT: [[MD_2ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP14]], i32 [[TMP15]]) 474; IEEE-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 475; IEEE-NEXT: ret void 476; 477; DAZ-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_known_nopsub 478; DAZ-SAME: (ptr addrspace(1) [[OUT:%.*]], float nofpclass(psub) [[X:%.*]]) #[[ATTR1]] { 479; DAZ-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 480; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 481; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 482; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 483; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 484; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 485; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 486; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 487; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 488; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 489; DAZ-NEXT: [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 490; DAZ-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 491; DAZ-NEXT: ret void 492; 493 %no.md = call float @llvm.sqrt.f32(float %x) 494 store volatile float %no.md, ptr addrspace(1) %out, align 4 495 496 %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1 497 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 498 499 %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2 500 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 501 502 %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0 503 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 504 505 %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3 506 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 507 508 %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4 509 store volatile float %md.2ulp, ptr addrspace(1) %out, align 4 510 ret void 511} 512 513define amdgpu_kernel void @sqrt_fpmath_f32_afn(ptr addrspace(1) %out, float %x) { 514; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_afn 515; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 516; CHECK-NEXT: [[NO_MD:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]) 517; CHECK-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 518; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !1 519; CHECK-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 520; CHECK-NEXT: [[MD_1ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !2 521; CHECK-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 522; CHECK-NEXT: [[MD_25ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !3 523; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 524; CHECK-NEXT: [[MD_3ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0 525; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 526; CHECK-NEXT: [[MD_2ULP:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !4 527; CHECK-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 528; CHECK-NEXT: ret void 529; 530 %no.md = call afn float @llvm.sqrt.f32(float %x) 531 store volatile float %no.md, ptr addrspace(1) %out, align 4 532 533 %md.half.ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !1 534 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 535 536 %md.1ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !2 537 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 538 539 %md.25ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !0 540 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 541 542 %md.3ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !3 543 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 544 545 %md.2ulp = call afn float @llvm.sqrt.f32(float %x), !fpmath !4 546 store volatile float %md.2ulp, ptr addrspace(1) %out, align 4 547 ret void 548} 549 550define amdgpu_kernel void @sqrt_fpmath_f32_assume_nosub(ptr addrspace(1) %out, float %x) { 551; CHECK-LABEL: define amdgpu_kernel void @sqrt_fpmath_f32_assume_nosub 552; CHECK-SAME: (ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 553; CHECK-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) 554; CHECK-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 555; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) 556; CHECK-NEXT: [[NO_MD:%.*]] = call float @llvm.sqrt.f32(float [[X]]) 557; CHECK-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 558; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath !1 559; CHECK-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 560; CHECK-NEXT: [[MD_1ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 561; CHECK-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 562; CHECK-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 563; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 564; CHECK-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 565; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 566; CHECK-NEXT: [[MD_2ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 567; CHECK-NEXT: store volatile float [[MD_2ULP]], ptr addrspace(1) [[OUT]], align 4 568; CHECK-NEXT: [[MD_3ULP_AFN:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]), !fpmath !0 569; CHECK-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 570; CHECK-NEXT: [[NO_MD_AFN:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]) 571; CHECK-NEXT: store volatile float [[NO_MD_AFN]], ptr addrspace(1) [[OUT]], align 4 572; CHECK-NEXT: ret void 573; 574 %fabs.x = call float @llvm.fabs.f32(float %x) 575 %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000 576 call void @llvm.assume(i1 %is.not.subnormal) 577 578 %no.md = call float @llvm.sqrt.f32(float %x) 579 store volatile float %no.md, ptr addrspace(1) %out, align 4 580 581 %md.half.ulp = call float @llvm.sqrt.f32(float %x), !fpmath !1 582 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 583 584 %md.1ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2 585 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 586 587 %md.25ulp = call float @llvm.sqrt.f32(float %x), !fpmath !0 588 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 589 590 %md.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !3 591 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 592 593 %md.2ulp = call float @llvm.sqrt.f32(float %x), !fpmath !4 594 store volatile float %md.2ulp, ptr addrspace(1) %out, align 4 595 596 %md.3ulp.afn = call afn float @llvm.sqrt.f32(float %x), !fpmath !3 597 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 598 599 %no.md.afn = call afn float @llvm.sqrt.f32(float %x) 600 store volatile float %no.md.afn, ptr addrspace(1) %out, align 4 601 602 ret void 603} 604 605declare float @llvm.sqrt.f32(float) 606declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) 607declare float @llvm.fabs.f32(float) 608declare void @llvm.assume(i1 noundef) 609 610attributes #0 = { optnone noinline } 611 612!0 = !{float 2.500000e+00} 613!1 = !{float 5.000000e-01} 614!2 = !{float 1.000000e+00} 615!3 = !{float 3.000000e+00} 616!4 = !{float 2.000000e+00} 617