1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 2; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-GOODFREXP %s 3; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-BADFREXP %s 4; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=dynamic %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-GOODFREXP %s 5; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=preserve-sign %s | FileCheck -check-prefixes=CHECK,DAZ %s 6 7; Make sure this doesn't crash with no triple 8; TODO: Delete when old PM deleted 9; RUN: opt -amdgpu-codegenprepare -disable-output %s 10 11 12define amdgpu_kernel void @noop_fdiv_fpmath(ptr addrspace(1) %out, float %a, float %b) #0 { 13; CHECK-LABEL: define amdgpu_kernel void @noop_fdiv_fpmath( 14; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR0:[0-9]+]] { 15; CHECK-NEXT: [[MD_25ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META0:![0-9]+]] 16; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 17; CHECK-NEXT: ret void 18; 19 %md.25ulp = fdiv float %a, %b, !fpmath !0 20 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 21 ret void 22} 23 24define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, float %b) { 25; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32( 26; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] { 27; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] 28; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 29; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]] 30; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 31; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 32; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 33; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 34; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 35; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 36; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 37; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 38; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] 39; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 40; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 41; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 42; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 43; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 44; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 45; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) 46; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 47; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 48; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1 49; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] 50; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] 51; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) 52; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 53; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 54; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 55; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 56; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) 57; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 58; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 59; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 60; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] 61; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] 62; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) 63; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 64; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] 65; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 66; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] 67; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 68; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] 69; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 70; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 71; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 72; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 73; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] 74; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 75; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) 76; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] 77; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 78; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 79; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 80; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1 81; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] 82; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) 83; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) 84; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] 85; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 86; IEEE-GOODFREXP-NEXT: ret void 87; 88; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32( 89; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] { 90; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] 91; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 92; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]] 93; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 94; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 95; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 96; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 97; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 98; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 99; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 100; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 101; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] 102; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 103; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 104; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 105; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 106; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 107; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 108; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) 109; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 110; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 111; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 112; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] 113; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] 114; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) 115; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 116; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 117; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 118; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 119; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) 120; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 121; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 122; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 123; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] 124; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] 125; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) 126; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 127; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] 128; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 129; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] 130; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 131; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] 132; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 133; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 134; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 135; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 136; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] 137; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 138; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) 139; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] 140; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 141; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 142; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 143; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 144; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] 145; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) 146; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) 147; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] 148; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 149; IEEE-BADFREXP-NEXT: ret void 150; 151; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32( 152; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] { 153; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] 154; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 155; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]] 156; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 157; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 158; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 159; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 160; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 161; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 162; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 163; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 164; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] 165; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 166; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 167; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 168; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) 169; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 170; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) 171; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 172; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] 173; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 174; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] 175; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 176; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] 177; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 178; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) 179; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]] 180; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 181; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) 182; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]] 183; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 184; DAZ-NEXT: ret void 185; 186 %no.md = fdiv float %a, %b 187 store volatile float %no.md, ptr addrspace(1) %out, align 4 188 %md.half.ulp = fdiv float %a, %b, !fpmath !1 189 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 190 %md.1ulp = fdiv float %a, %b, !fpmath !2 191 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 192 %md.25ulp = fdiv float %a, %b, !fpmath !0 193 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 194 %md.3ulp = fdiv float %a, %b, !fpmath !3 195 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 196 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 197 store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4 198 %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0 199 store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4 200 %no.md.arcp = fdiv arcp float %a, %b 201 store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4 202 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 203 store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4 204 %arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2 205 store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4 206 ret void 207} 208 209define amdgpu_kernel void @fdiv_fpmath_f32_flags(ptr addrspace(1) %out, float %a, float %b) { 210; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags( 211; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { 212; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 213; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 214; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 215; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 216; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 217; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 218; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 219; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]] 220; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 221; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 222; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 223; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 224; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 225; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 226; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]]) 227; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 228; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 229; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1 230; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul nnan ninf float [[TMP15]], [[TMP13]] 231; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] 232; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) 233; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 234; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 235; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 236; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 237; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP20]]) 238; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 239; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 240; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 241; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul ninf float [[TMP24]], [[TMP22]] 242; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] 243; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) 244; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 245; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 246; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 247; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 248; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 249; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 250; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 251; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1 252; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul ninf float [[TMP33]], [[TMP31]] 253; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] 254; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) 255; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 256; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 257; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = extractvalue { float, i32 } [[TMP37]], 0 258; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = extractvalue { float, i32 } [[TMP37]], 1 259; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP38]]) 260; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 261; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP41]], 0 262; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = extractvalue { float, i32 } [[TMP41]], 1 263; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = fmul nnan float [[TMP42]], [[TMP40]] 264; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = sub i32 [[TMP43]], [[TMP39]] 265; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP44]], i32 [[TMP45]]) 266; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 267; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 268; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0 269; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = extractvalue { float, i32 } [[TMP46]], 1 270; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP47]]) 271; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 272; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = extractvalue { float, i32 } [[TMP50]], 0 273; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = extractvalue { float, i32 } [[TMP50]], 1 274; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = fmul nnan float [[TMP51]], [[TMP49]] 275; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = sub i32 [[TMP52]], [[TMP48]] 276; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP53]], i32 [[TMP54]]) 277; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 278; IEEE-GOODFREXP-NEXT: ret void 279; 280; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags( 281; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { 282; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 283; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 284; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 285; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 286; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 287; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 288; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 289; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]] 290; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 291; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 292; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 293; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 294; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 295; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 296; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]]) 297; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 298; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 299; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 300; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul nnan ninf float [[TMP15]], [[TMP13]] 301; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] 302; IEEE-BADFREXP-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) 303; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 304; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 305; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 306; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 307; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP20]]) 308; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 309; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 310; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 311; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul ninf float [[TMP24]], [[TMP22]] 312; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] 313; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) 314; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 315; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 316; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 317; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 318; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 319; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 320; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 321; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 322; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul ninf float [[TMP33]], [[TMP31]] 323; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] 324; IEEE-BADFREXP-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) 325; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 326; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 327; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = extractvalue { float, i32 } [[TMP37]], 0 328; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 329; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP38]]) 330; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 331; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP41]], 0 332; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 333; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = fmul nnan float [[TMP42]], [[TMP40]] 334; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = sub i32 [[TMP43]], [[TMP39]] 335; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP44]], i32 [[TMP45]]) 336; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 337; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 338; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0 339; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 340; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP47]]) 341; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 342; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = extractvalue { float, i32 } [[TMP50]], 0 343; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 344; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = fmul nnan float [[TMP51]], [[TMP49]] 345; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = sub i32 [[TMP52]], [[TMP48]] 346; IEEE-BADFREXP-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP53]], i32 [[TMP54]]) 347; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 348; IEEE-BADFREXP-NEXT: ret void 349; 350; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags( 351; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { 352; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 353; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 354; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 355; DAZ-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 356; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 357; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 358; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 359; DAZ-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]] 360; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 361; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 362; DAZ-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 363; DAZ-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) 364; DAZ-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 365; DAZ-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 366; DAZ-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 367; DAZ-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 368; DAZ-NEXT: [[TMP13:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]]) 369; DAZ-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 370; DAZ-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 371; DAZ-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1 372; DAZ-NEXT: [[TMP17:%.*]] = fmul ninf float [[TMP15]], [[TMP13]] 373; DAZ-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] 374; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) 375; DAZ-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 376; DAZ-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) 377; DAZ-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 378; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 379; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 380; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 381; DAZ-NEXT: [[TMP22:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP20]]) 382; DAZ-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 383; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 384; DAZ-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 385; DAZ-NEXT: [[TMP26:%.*]] = fmul nnan float [[TMP24]], [[TMP22]] 386; DAZ-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] 387; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) 388; DAZ-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 389; DAZ-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) 390; DAZ-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 391; DAZ-NEXT: ret void 392; 393 %md.1ulp.ninf.nnan = fdiv ninf nnan float %a, %b, !fpmath !2 394 store volatile float %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4 395 396 %md.25ulp.ninf.nnan = fdiv ninf nnan float %a, %b, !fpmath !0 397 store volatile float %md.25ulp.ninf.nnan, ptr addrspace(1) %out, align 4 398 399 %md.1ulp.ninf = fdiv ninf float %a, %b, !fpmath !2 400 store volatile float %md.1ulp.ninf, ptr addrspace(1) %out, align 4 401 402 %md.25ulp.ninf = fdiv ninf float %a, %b, !fpmath !0 403 store volatile float %md.25ulp.ninf, ptr addrspace(1) %out, align 4 404 405 %md.1ulp.nnan = fdiv nnan float %a, %b, !fpmath !2 406 store volatile float %md.1ulp.nnan, ptr addrspace(1) %out, align 4 407 408 %md.25ulp.nnan = fdiv nnan float %a, %b, !fpmath !0 409 store volatile float %md.25ulp.nnan, ptr addrspace(1) %out, align 4 410 411 ret void 412} 413 414define amdgpu_kernel void @rcp_fdiv_f32_fpmath(ptr addrspace(1) %out, float %x) { 415; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath( 416; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 417; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]] 418; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 419; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 420; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 421; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 422; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 423; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 424; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 425; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 426; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 427; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 428; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 429; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 430; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 431; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 432; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 433; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]] 434; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 435; IEEE-GOODFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]] 436; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 437; IEEE-GOODFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]] 438; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 439; IEEE-GOODFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]] 440; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 441; IEEE-GOODFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]] 442; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 443; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg float [[X]] 444; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) 445; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 446; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 447; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 448; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 449; IEEE-GOODFREXP-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 450; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 451; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fneg float [[X]] 452; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]]) 453; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 454; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 455; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] 456; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP19]]) 457; IEEE-GOODFREXP-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) 458; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 459; IEEE-GOODFREXP-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]] 460; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 461; IEEE-GOODFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]] 462; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 463; IEEE-GOODFREXP-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]] 464; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 465; IEEE-GOODFREXP-NEXT: ret void 466; 467; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath( 468; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 469; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]] 470; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 471; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 472; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 473; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) 474; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 475; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 476; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 477; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 478; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 479; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 480; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) 481; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 482; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 483; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 484; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 485; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]] 486; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 487; IEEE-BADFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]] 488; IEEE-BADFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 489; IEEE-BADFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]] 490; IEEE-BADFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 491; IEEE-BADFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]] 492; IEEE-BADFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 493; IEEE-BADFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]] 494; IEEE-BADFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 495; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg float [[X]] 496; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) 497; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 498; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]]) 499; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 500; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 501; IEEE-BADFREXP-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 502; IEEE-BADFREXP-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 503; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fneg float [[X]] 504; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]]) 505; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 506; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP17]]) 507; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] 508; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP19]]) 509; IEEE-BADFREXP-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) 510; IEEE-BADFREXP-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 511; IEEE-BADFREXP-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]] 512; IEEE-BADFREXP-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 513; IEEE-BADFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]] 514; IEEE-BADFREXP-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 515; IEEE-BADFREXP-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]] 516; IEEE-BADFREXP-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 517; IEEE-BADFREXP-NEXT: ret void 518; 519; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath( 520; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 521; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]] 522; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 523; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]]) 524; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 525; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]]) 526; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 527; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]] 528; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 529; DAZ-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]] 530; DAZ-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 531; DAZ-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]] 532; DAZ-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 533; DAZ-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]] 534; DAZ-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 535; DAZ-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]] 536; DAZ-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 537; DAZ-NEXT: [[TMP1:%.*]] = fneg float [[X]] 538; DAZ-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]]) 539; DAZ-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 540; DAZ-NEXT: [[TMP2:%.*]] = fneg float [[X]] 541; DAZ-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 542; DAZ-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 543; DAZ-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]] 544; DAZ-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 545; DAZ-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]] 546; DAZ-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 547; DAZ-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]] 548; DAZ-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 549; DAZ-NEXT: ret void 550; 551 %no.md = fdiv float 1.000000e+00, %x 552 store volatile float %no.md, ptr addrspace(1) %out, align 4 553 %md.1ulp = fdiv float 1.000000e+00, %x, !fpmath !2 554 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 555 %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0 556 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 557 %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1 558 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 559 %afn.no.md = fdiv afn float 1.000000e+00, %x 560 store volatile float %afn.no.md, ptr addrspace(1) %out, align 4 561 %afn.25ulp = fdiv afn float 1.000000e+00, %x, !fpmath !0 562 store volatile float %afn.25ulp, ptr addrspace(1) %out, align 4 563 %fast.no.md = fdiv fast float 1.000000e+00, %x 564 store volatile float %fast.no.md, ptr addrspace(1) %out, align 4 565 %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0 566 store volatile float %fast.25ulp, ptr addrspace(1) %out, align 4 567 %neg.md.1ulp = fdiv float -1.000000e+00, %x, !fpmath !2 568 store volatile float %neg.md.1ulp, ptr addrspace(1) %out, align 4 569 %neg.md.25ulp = fdiv float -1.000000e+00, %x, !fpmath !0 570 store volatile float %neg.md.25ulp, ptr addrspace(1) %out, align 4 571 %neg.afn.no.md = fdiv afn float -1.000000e+00, %x 572 store volatile float %neg.afn.no.md, ptr addrspace(1) %out, align 4 573 %neg.afn.25ulp = fdiv afn float -1.000000e+00, %x, !fpmath !0 574 store volatile float %neg.afn.25ulp, ptr addrspace(1) %out, align 4 575 %neg.fast.no.md = fdiv fast float -1.000000e+00, %x 576 store volatile float %neg.fast.no.md, ptr addrspace(1) %out, align 4 577 ret void 578} 579 580define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags(ptr addrspace(1) %out, float %x) { 581; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags( 582; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 583; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 584; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 585; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 586; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 587; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 588; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 589; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 590; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 591; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 592; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 593; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 594; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 595; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 596; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 597; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 598; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 599; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1 600; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] 601; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]]) 602; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) 603; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 604; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 605; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0 606; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1 607; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = sub i32 0, [[TMP18]] 608; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]]) 609; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP19]]) 610; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 611; IEEE-GOODFREXP-NEXT: ret void 612; 613; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags( 614; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 615; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 616; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 617; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) 618; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 619; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 620; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 621; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 622; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 623; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 624; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) 625; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 626; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 627; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 628; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 629; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 630; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 631; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) 632; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] 633; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]]) 634; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) 635; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 636; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 637; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0 638; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) 639; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = sub i32 0, [[TMP18]] 640; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]]) 641; IEEE-BADFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP19]]) 642; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 643; IEEE-BADFREXP-NEXT: ret void 644; 645; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags( 646; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 647; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[X]]) 648; DAZ-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 649; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[X]]) 650; DAZ-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 651; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[X]]) 652; DAZ-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 653; DAZ-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[X]]) 654; DAZ-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 655; DAZ-NEXT: ret void 656; 657 %md.1ulp.ninf.nnan = fdiv ninf nnan float 1.000000e+00, %x, !fpmath !2 658 store volatile float %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4 659 660 %md.1ulp.ninf = fdiv ninf float 1.000000e+00, %x, !fpmath !2 661 store volatile float %md.1ulp.ninf, ptr addrspace(1) %out, align 4 662 663 %md.1ulp.nnan = fdiv nnan float 1.000000e+00, %x, !fpmath !2 664 store volatile float %md.1ulp.nnan, ptr addrspace(1) %out, align 4 665 666 %md.1ulp.nsz = fdiv nsz float 1.000000e+00, %x, !fpmath !2 667 store volatile float %md.1ulp.nsz, ptr addrspace(1) %out, align 4 668 669 ret void 670} 671 672define amdgpu_kernel void @rcp_fdiv_f32_knownfinite(ptr addrspace(1) %out, 673; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite( 674; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] { 675; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NAN]]) 676; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 677; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 678; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 679; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 680; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 681; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4 682; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF]]) 683; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 684; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 685; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 686; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 687; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 688; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4 689; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF_NAN]]) 690; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 691; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1 692; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] 693; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]]) 694; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) 695; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4 696; IEEE-GOODFREXP-NEXT: ret void 697; 698; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite( 699; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] { 700; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NAN]]) 701; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 702; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_NAN]]) 703; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 704; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 705; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 706; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4 707; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF]]) 708; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 709; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_INF]]) 710; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 711; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 712; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 713; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4 714; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF_NAN]]) 715; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 716; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_INF_NAN]]) 717; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] 718; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]]) 719; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) 720; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4 721; IEEE-BADFREXP-NEXT: ret void 722; 723; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite( 724; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] { 725; DAZ-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_NAN]]) 726; DAZ-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4 727; DAZ-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_INF]]) 728; DAZ-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4 729; DAZ-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_INF_NAN]]) 730; DAZ-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4 731; DAZ-NEXT: ret void 732; 733 float nofpclass(nan) %no.nan, 734 float nofpclass(nan) %no.inf, 735 float nofpclass(inf nan) %no.inf.nan) { 736 %md.1ulp.no.nan = fdiv float 1.000000e+00, %no.nan, !fpmath !2 737 store volatile float %md.1ulp.no.nan, ptr addrspace(1) %out, align 4 738 739 %md.1ulp.no.inf = fdiv float 1.000000e+00, %no.inf, !fpmath !2 740 store volatile float %md.1ulp.no.inf, ptr addrspace(1) %out, align 4 741 742 %md.1ulp.no.inf.nan = fdiv float 1.000000e+00, %no.inf.nan, !fpmath !2 743 store volatile float %md.1ulp.no.inf.nan, ptr addrspace(1) %out, align 4 744 745 ret void 746} 747 748define amdgpu_kernel void @rcp_fdiv_f32_nozero(ptr addrspace(1) %out, 749; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero( 750; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] { 751; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO]]) 752; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 753; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 754; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 755; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 756; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 757; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4 758; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO_SUB]]) 759; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 760; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 761; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 762; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 763; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 764; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4 765; IEEE-GOODFREXP-NEXT: ret void 766; 767; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero( 768; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] { 769; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO]]) 770; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 771; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_ZERO]]) 772; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 773; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 774; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 775; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4 776; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO_SUB]]) 777; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 778; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_ZERO_SUB]]) 779; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 780; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 781; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 782; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4 783; IEEE-BADFREXP-NEXT: ret void 784; 785; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero( 786; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] { 787; DAZ-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_ZERO]]) 788; DAZ-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4 789; DAZ-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_ZERO_SUB]]) 790; DAZ-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4 791; DAZ-NEXT: ret void 792; 793 float nofpclass(zero) %no.zero, 794 float nofpclass(zero sub) %no.zero.sub) { 795 %md.1ulp.no.zero = fdiv float 1.000000e+00, %no.zero, !fpmath !2 796 store volatile float %md.1ulp.no.zero, ptr addrspace(1) %out, align 4 797 798 %md.1ulp.no.zero.sub = fdiv float 1.000000e+00, %no.zero.sub, !fpmath !2 799 store volatile float %md.1ulp.no.zero.sub, ptr addrspace(1) %out, align 4 800 ret void 801} 802 803define amdgpu_kernel void @rcp_fdiv_f32_nosub(ptr addrspace(1) %out, 804; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub( 805; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] { 806; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_SUB]]) 807; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 808; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 809; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 810; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 811; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 812; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 813; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NSUB]]) 814; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 815; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 816; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 817; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 818; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 819; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4 820; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_PSUB]]) 821; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 822; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1 823; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] 824; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]]) 825; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) 826; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4 827; IEEE-GOODFREXP-NEXT: ret void 828; 829; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub( 830; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] { 831; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_SUB]]) 832; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 833; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_SUB]]) 834; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 835; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 836; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 837; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 838; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NSUB]]) 839; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 840; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_NSUB]]) 841; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 842; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 843; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 844; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4 845; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_PSUB]]) 846; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 847; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_PSUB]]) 848; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] 849; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]]) 850; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) 851; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4 852; IEEE-BADFREXP-NEXT: ret void 853; 854; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub( 855; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] { 856; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_SUB]]) 857; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 858; DAZ-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_NSUB]]) 859; DAZ-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4 860; DAZ-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_PSUB]]) 861; DAZ-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4 862; DAZ-NEXT: ret void 863; 864 float nofpclass(sub) %no.sub, 865 float nofpclass(nsub) %no.nsub, 866 float nofpclass(psub) %no.psub) { 867 %md.1ulp.no.sub = fdiv float 1.000000e+00, %no.sub, !fpmath !2 868 store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4 869 870 %md.1ulp.no.nsub = fdiv float 1.000000e+00, %no.nsub, !fpmath !2 871 store volatile float %md.1ulp.no.nsub, ptr addrspace(1) %out, align 4 872 873 %md.1ulp.no.psub = fdiv float 1.000000e+00, %no.psub, !fpmath !2 874 store volatile float %md.1ulp.no.psub, ptr addrspace(1) %out, align 4 875 876 ret void 877} 878 879define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub(ptr addrspace(1) %out, float %x) { 880; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub( 881; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 882; IEEE-GOODFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) 883; IEEE-GOODFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 884; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) 885; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 886; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 887; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 888; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 889; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 890; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 891; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 892; IEEE-GOODFREXP-NEXT: ret void 893; 894; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub( 895; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 896; IEEE-BADFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) 897; IEEE-BADFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 898; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) 899; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 900; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 901; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) 902; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 903; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 904; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 905; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 906; IEEE-BADFREXP-NEXT: ret void 907; 908; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub( 909; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 910; DAZ-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) 911; DAZ-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 912; DAZ-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) 913; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]]) 914; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 915; DAZ-NEXT: ret void 916; 917 %fabs.x = call float @llvm.fabs.f32(float %x) 918 %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000 919 call void @llvm.assume(i1 %is.not.subnormal) 920 %md.1ulp.no.sub = fdiv float 1.000000e+00, %x, !fpmath !2 921 store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4 922 ret void 923} 924 925; Test if we have an assumption on the output that it's not denormal. 926define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub(ptr addrspace(1) %out, float %x) { 927; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub( 928; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 929; IEEE-GOODFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) 930; IEEE-GOODFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 931; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) 932; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 933; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 934; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 935; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 936; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 937; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 938; IEEE-GOODFREXP-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]]) 939; IEEE-GOODFREXP-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000 940; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]]) 941; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 942; IEEE-GOODFREXP-NEXT: ret void 943; 944; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub( 945; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 946; IEEE-BADFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) 947; IEEE-BADFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 948; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) 949; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) 950; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 951; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) 952; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 953; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 954; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 955; IEEE-BADFREXP-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]]) 956; IEEE-BADFREXP-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000 957; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]]) 958; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 959; IEEE-BADFREXP-NEXT: ret void 960; 961; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub( 962; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 963; DAZ-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) 964; DAZ-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 965; DAZ-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) 966; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]]) 967; DAZ-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]]) 968; DAZ-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000 969; DAZ-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]]) 970; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 971; DAZ-NEXT: ret void 972; 973 %fabs.x = call float @llvm.fabs.f32(float %x) 974 %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000 975 call void @llvm.assume(i1 %is.not.subnormal) 976 %md.1ulp.no.sub = fdiv float 1.000000e+00, %x, !fpmath !2 977 978 %fabs.result = call float @llvm.fabs.f32(float %md.1ulp.no.sub) 979 %result.is.not.subnormal = fcmp oge float %fabs.result, 0x3810000000000000 980 call void @llvm.assume(i1 %result.is.not.subnormal) 981 store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4 982 ret void 983} 984 985define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags(ptr addrspace(1) %out, <2 x float> %x) { 986; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags( 987; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 988; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 989; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 990; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) 991; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0 992; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractvalue { float, i32 } [[TMP3]], 1 993; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = sub i32 0, [[TMP5]] 994; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP4]]) 995; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP7]], i32 [[TMP6]]) 996; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]]) 997; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0 998; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1 999; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]] 1000; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP10]]) 1001; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP13]], i32 [[TMP12]]) 1002; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 1003; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 1004; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 1005; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 1006; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 1007; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP16]]) 1008; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 1009; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 1010; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] 1011; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP19]]) 1012; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) 1013; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]]) 1014; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0 1015; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP24]], 1 1016; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]] 1017; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP25]]) 1018; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]]) 1019; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i64 0 1020; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 1 1021; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 1022; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[X]], i64 0 1023; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractelement <2 x float> [[X]], i64 1 1024; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP31]]) 1025; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP33]], 0 1026; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP33]], 1 1027; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 0, [[TMP35]] 1028; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP34]]) 1029; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP36]]) 1030; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP32]]) 1031; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 1032; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1 1033; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]] 1034; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP40]]) 1035; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]]) 1036; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = insertelement <2 x float> poison, float [[TMP38]], i64 0 1037; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP45]], float [[TMP44]], i64 1 1038; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 1039; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = extractelement <2 x float> [[X]], i64 0 1040; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = extractelement <2 x float> [[X]], i64 1 1041; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP46]]) 1042; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0 1043; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = extractvalue { float, i32 } [[TMP48]], 1 1044; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP50]] 1045; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP49]]) 1046; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP52]], i32 [[TMP51]]) 1047; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP47]]) 1048; IEEE-GOODFREXP-NEXT: [[TMP55:%.*]] = extractvalue { float, i32 } [[TMP54]], 0 1049; IEEE-GOODFREXP-NEXT: [[TMP56:%.*]] = extractvalue { float, i32 } [[TMP54]], 1 1050; IEEE-GOODFREXP-NEXT: [[TMP57:%.*]] = sub i32 0, [[TMP56]] 1051; IEEE-GOODFREXP-NEXT: [[TMP58:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP55]]) 1052; IEEE-GOODFREXP-NEXT: [[TMP59:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP58]], i32 [[TMP57]]) 1053; IEEE-GOODFREXP-NEXT: [[TMP60:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0 1054; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP60]], float [[TMP59]], i64 1 1055; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 1056; IEEE-GOODFREXP-NEXT: ret void 1057; 1058; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags( 1059; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 1060; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 1061; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 1062; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) 1063; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0 1064; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]]) 1065; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = sub i32 0, [[TMP5]] 1066; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP4]]) 1067; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP7]], i32 [[TMP6]]) 1068; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]]) 1069; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0 1070; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP2]]) 1071; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]] 1072; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP10]]) 1073; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP13]], i32 [[TMP12]]) 1074; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 1075; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 1076; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 1077; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 1078; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 1079; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP16]]) 1080; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 1081; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP16]]) 1082; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] 1083; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP19]]) 1084; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) 1085; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]]) 1086; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0 1087; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP17]]) 1088; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]] 1089; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP25]]) 1090; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]]) 1091; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i64 0 1092; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 1 1093; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 1094; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[X]], i64 0 1095; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractelement <2 x float> [[X]], i64 1 1096; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP31]]) 1097; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP33]], 0 1098; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP31]]) 1099; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 0, [[TMP35]] 1100; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP34]]) 1101; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP36]]) 1102; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP32]]) 1103; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 1104; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP32]]) 1105; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]] 1106; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP40]]) 1107; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]]) 1108; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = insertelement <2 x float> poison, float [[TMP38]], i64 0 1109; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP45]], float [[TMP44]], i64 1 1110; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 1111; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = extractelement <2 x float> [[X]], i64 0 1112; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = extractelement <2 x float> [[X]], i64 1 1113; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP46]]) 1114; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0 1115; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP46]]) 1116; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP50]] 1117; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP49]]) 1118; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP52]], i32 [[TMP51]]) 1119; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP47]]) 1120; IEEE-BADFREXP-NEXT: [[TMP55:%.*]] = extractvalue { float, i32 } [[TMP54]], 0 1121; IEEE-BADFREXP-NEXT: [[TMP56:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP47]]) 1122; IEEE-BADFREXP-NEXT: [[TMP57:%.*]] = sub i32 0, [[TMP56]] 1123; IEEE-BADFREXP-NEXT: [[TMP58:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP55]]) 1124; IEEE-BADFREXP-NEXT: [[TMP59:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP58]], i32 [[TMP57]]) 1125; IEEE-BADFREXP-NEXT: [[TMP60:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0 1126; IEEE-BADFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP60]], float [[TMP59]], i64 1 1127; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 1128; IEEE-BADFREXP-NEXT: ret void 1129; 1130; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags( 1131; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 1132; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 1133; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 1134; DAZ-NEXT: [[TMP3:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP1]]) 1135; DAZ-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 1136; DAZ-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 1137; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1 1138; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 1139; DAZ-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[X]], i64 0 1140; DAZ-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[X]], i64 1 1141; DAZ-NEXT: [[TMP8:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 1142; DAZ-NEXT: [[TMP9:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 1143; DAZ-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 1144; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP9]], i64 1 1145; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 1146; DAZ-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[X]], i64 0 1147; DAZ-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[X]], i64 1 1148; DAZ-NEXT: [[TMP13:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP11]]) 1149; DAZ-NEXT: [[TMP14:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]]) 1150; DAZ-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i64 0 1151; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 1152; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 1153; DAZ-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 1154; DAZ-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 1155; DAZ-NEXT: [[TMP18:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP16]]) 1156; DAZ-NEXT: [[TMP19:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]]) 1157; DAZ-NEXT: [[TMP20:%.*]] = insertelement <2 x float> poison, float [[TMP18]], i64 0 1158; DAZ-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP20]], float [[TMP19]], i64 1 1159; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 1160; DAZ-NEXT: ret void 1161; 1162 %md.1ulp.ninf.nnan = fdiv ninf nnan <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2 1163 store volatile <2 x float> %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4 1164 1165 %md.1ulp.ninf = fdiv ninf <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2 1166 store volatile <2 x float> %md.1ulp.ninf, ptr addrspace(1) %out, align 4 1167 1168 %md.1ulp.nnan = fdiv nnan <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2 1169 store volatile <2 x float> %md.1ulp.nnan, ptr addrspace(1) %out, align 4 1170 1171 %md.1ulp.nsz = fdiv nsz <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2 1172 store volatile <2 x float> %md.1ulp.nsz, ptr addrspace(1) %out, align 4 1173 1174 ret void 1175} 1176 1177define amdgpu_kernel void @fdiv_fpmath_f32_vector(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) { 1178; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector( 1179; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] { 1180; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]] 1181; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 1182; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]] 1183; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 1184; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0 1185; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1 1186; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0 1187; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1 1188; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 1189; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 1190; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 1191; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 1192; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) 1193; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0 1194; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1 1195; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]] 1196; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]] 1197; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]]) 1198; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 1199; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0 1200; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP15]], 1 1201; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]]) 1202; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]]) 1203; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 1204; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 1205; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]] 1206; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]] 1207; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]]) 1208; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0 1209; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1 1210; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8 1211; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0 1212; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1 1213; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0 1214; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1 1215; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP28]]) 1216; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP30]], 0 1217; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP30]], 1 1218; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP31]]) 1219; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP26]]) 1220; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 1221; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1 1222; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = fmul float [[TMP35]], [[TMP33]] 1223; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = sub i32 [[TMP36]], [[TMP32]] 1224; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP38]]) 1225; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP29]]) 1226; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0 1227; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP40]], 1 1228; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP41]]) 1229; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP27]]) 1230; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP44]], 0 1231; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = extractvalue { float, i32 } [[TMP44]], 1 1232; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = fmul float [[TMP45]], [[TMP43]] 1233; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = sub i32 [[TMP46]], [[TMP42]] 1234; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP47]], i32 [[TMP48]]) 1235; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = insertelement <2 x float> poison, float [[TMP39]], i64 0 1236; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP50]], float [[TMP49]], i64 1 1237; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8 1238; IEEE-GOODFREXP-NEXT: ret void 1239; 1240; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector( 1241; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] { 1242; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]] 1243; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 1244; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]] 1245; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 1246; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0 1247; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1 1248; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0 1249; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1 1250; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 1251; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 1252; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) 1253; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 1254; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) 1255; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0 1256; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]]) 1257; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]] 1258; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]] 1259; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]]) 1260; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 1261; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0 1262; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) 1263; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]]) 1264; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]]) 1265; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 1266; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP2]]) 1267; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]] 1268; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]] 1269; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]]) 1270; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0 1271; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1 1272; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8 1273; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0 1274; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1 1275; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0 1276; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1 1277; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP28]]) 1278; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP30]], 0 1279; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP28]]) 1280; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP31]]) 1281; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP26]]) 1282; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 1283; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP26]]) 1284; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = fmul float [[TMP35]], [[TMP33]] 1285; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = sub i32 [[TMP36]], [[TMP32]] 1286; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP38]]) 1287; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP29]]) 1288; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0 1289; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP29]]) 1290; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP41]]) 1291; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP27]]) 1292; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP44]], 0 1293; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP27]]) 1294; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = fmul float [[TMP45]], [[TMP43]] 1295; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = sub i32 [[TMP46]], [[TMP42]] 1296; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP47]], i32 [[TMP48]]) 1297; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = insertelement <2 x float> poison, float [[TMP39]], i64 0 1298; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP50]], float [[TMP49]], i64 1 1299; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8 1300; IEEE-BADFREXP-NEXT: ret void 1301; 1302; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector( 1303; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] { 1304; DAZ-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]] 1305; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 1306; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]] 1307; DAZ-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 1308; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0 1309; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1 1310; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0 1311; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1 1312; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 1313; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 1314; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 1315; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 1316; DAZ-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) 1317; DAZ-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0 1318; DAZ-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1 1319; DAZ-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]] 1320; DAZ-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]] 1321; DAZ-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]]) 1322; DAZ-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 1323; DAZ-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0 1324; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP15]], 1 1325; DAZ-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]]) 1326; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]]) 1327; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 1328; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 1329; DAZ-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]] 1330; DAZ-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]] 1331; DAZ-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]]) 1332; DAZ-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0 1333; DAZ-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1 1334; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8 1335; DAZ-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0 1336; DAZ-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1 1337; DAZ-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0 1338; DAZ-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1 1339; DAZ-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[TMP26]], float [[TMP28]]) 1340; DAZ-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[TMP27]], float [[TMP29]]) 1341; DAZ-NEXT: [[TMP32:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0 1342; DAZ-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP32]], float [[TMP31]], i64 1 1343; DAZ-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8 1344; DAZ-NEXT: ret void 1345; 1346 %no.md = fdiv <2 x float> %a, %b 1347 store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8 1348 %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 1349 store volatile <2 x float> %md.half.ulp, ptr addrspace(1) %out, align 8 1350 %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 1351 store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 8 1352 %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0 1353 store volatile <2 x float> %md.25ulp, ptr addrspace(1) %out, align 8 1354 ret void 1355} 1356 1357define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) { 1358; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath( 1359; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1:[0-9]+]] { 1360; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] 1361; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 1362; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META1:![0-9]+]] 1363; CHECK-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 1364; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]] 1365; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8 1366; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]] 1367; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8 1368; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]] 1369; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8 1370; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]] 1371; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8 1372; CHECK-NEXT: ret void 1373; 1374 %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x 1375 store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8 1376 %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1 1377 store volatile <2 x float> %md.half.ulp, ptr addrspace(1) %out, align 8 1378 %afn.no.md = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x 1379 store volatile <2 x float> %afn.no.md, ptr addrspace(1) %out, align 8 1380 %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x 1381 store volatile <2 x float> %fast.no.md, ptr addrspace(1) %out, align 8 1382 %afn.25ulp = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0 1383 store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8 1384 %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0 1385 store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8 1386 ret void 1387} 1388 1389define amdgpu_kernel void @rcp_fdiv_f32_fpmath_vector_nonsplat(ptr addrspace(1) %out, <2 x float> %x) { 1390; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_vector_nonsplat( 1391; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 1392; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]] 1393; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 1394; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]] 1395; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8 1396; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]] 1397; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8 1398; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]], !fpmath [[META0]] 1399; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8 1400; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]], !fpmath [[META0]] 1401; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8 1402; CHECK-NEXT: ret void 1403; 1404 %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x 1405 store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8 1406 %afn.no.md = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x 1407 store volatile <2 x float> %afn.no.md, ptr addrspace(1) %out, align 8 1408 %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x 1409 store volatile <2 x float> %fast.no.md, ptr addrspace(1) %out, align 8 1410 %afn.25ulp = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0 1411 store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8 1412 %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0 1413 store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8 1414 ret void 1415} 1416 1417define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y) { 1418; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant( 1419; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] { 1420; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0 1421; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> [[X_INSERT]], [[Y]], !fpmath [[META0]] 1422; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8 1423; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> [[X_INSERT]], [[Y]], !fpmath [[META0]] 1424; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8 1425; CHECK-NEXT: ret void 1426; 1427 %x.insert = insertelement <2 x float> %x, float 1.000000e+00, i32 0 1428 %afn.25ulp = fdiv afn <2 x float> %x.insert, %y, !fpmath !0 1429 store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8 1430 %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0 1431 store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8 1432 ret void 1433} 1434 1435define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y) { 1436; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp( 1437; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] { 1438; IEEE-GOODFREXP-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0 1439; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0 1440; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1 1441; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0 1442; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1 1443; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 1444; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 1445; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 1446; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] 1447; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 1448; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) 1449; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]] 1450; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 1451; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 1452; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 1453; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 1454; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 1455; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 1456; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]] 1457; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 1458; IEEE-GOODFREXP-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1 1459; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8 1460; IEEE-GOODFREXP-NEXT: ret void 1461; 1462; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp( 1463; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] { 1464; IEEE-BADFREXP-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0 1465; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0 1466; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1 1467; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0 1468; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1 1469; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 1470; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 1471; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) 1472; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] 1473; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 1474; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) 1475; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]] 1476; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 1477; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 1478; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) 1479; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 1480; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 1481; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 1482; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]] 1483; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 1484; IEEE-BADFREXP-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1 1485; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8 1486; IEEE-BADFREXP-NEXT: ret void 1487; 1488; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp( 1489; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] { 1490; DAZ-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0 1491; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0 1492; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1 1493; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0 1494; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1 1495; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]]) 1496; DAZ-NEXT: [[TMP6:%.*]] = fmul arcp float [[TMP1]], [[TMP5]] 1497; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]]) 1498; DAZ-NEXT: [[TMP8:%.*]] = fmul arcp float [[TMP2]], [[TMP7]] 1499; DAZ-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 1500; DAZ-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i64 1 1501; DAZ-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8 1502; DAZ-NEXT: ret void 1503; 1504 %x.insert = insertelement <2 x float> %x, float 1.000000e+00, i32 0 1505 %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0 1506 store volatile <2 x float> %arcp.25ulp, ptr addrspace(1) %out, align 8 1507 ret void 1508} 1509 1510define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) { 1511; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rsq_f32_fpmath( 1512; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 1513; IEEE-GOODFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]) 1514; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]] 1515; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 1516; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 1517; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 1518; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = fmul contract float [[X]], [[TMP2]] 1519; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) 1520; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 1521; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = fmul contract float [[TMP4]], [[TMP5]] 1522; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 1523; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2:![0-9]+]] 1524; IEEE-GOODFREXP-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 1525; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MD_1ULP_MULTI_USE]]) 1526; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 1527; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 1528; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 1529; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 1530; IEEE-GOODFREXP-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 1531; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 1532; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 1533; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 1534; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[X]], [[TMP12]] 1535; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) 1536; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 1537; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = fmul contract float [[TMP14]], [[TMP15]] 1538; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 1539; IEEE-GOODFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] 1540; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]] 1541; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 1542; IEEE-GOODFREXP-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]) 1543; IEEE-GOODFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]] 1544; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 1545; IEEE-GOODFREXP-NEXT: [[AFN_25ULP:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[X]]) 1546; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 1547; IEEE-GOODFREXP-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]]) 1548; IEEE-GOODFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]] 1549; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 1550; IEEE-GOODFREXP-NEXT: [[FAST_25ULP:%.*]] = call fast float @llvm.amdgcn.rsq.f32(float [[X]]) 1551; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 1552; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 1553; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = select contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00 1554; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul contract float [[X]], [[TMP17]] 1555; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP18]]) 1556; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = select contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00 1557; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = fmul contract float [[TMP19]], [[TMP20]] 1558; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 1559; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 1560; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = select contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00 1561; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = fmul contract float [[X]], [[TMP22]] 1562; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP23]]) 1563; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = select contract i1 [[TMP21]], float -4.096000e+03, float -1.000000e+00 1564; IEEE-GOODFREXP-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fmul contract float [[TMP24]], [[TMP25]] 1565; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 1566; IEEE-GOODFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] 1567; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_HALF_ULP]]) 1568; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0 1569; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP26]], 1 1570; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = sub i32 0, [[TMP28]] 1571; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP27]]) 1572; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP29]]) 1573; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4 1574; IEEE-GOODFREXP-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]) 1575; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MISMATCH_MD1]]) 1576; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 1577; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1 1578; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]] 1579; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) 1580; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]]) 1581; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4 1582; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = fcmp olt float [[X]], 0x3810000000000000 1583; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 32, i32 0 1584; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP37]]) 1585; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP38]]) 1586; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = select i1 [[TMP36]], i32 -16, i32 0 1587; IEEE-GOODFREXP-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP39]], i32 [[TMP40]]) 1588; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]] 1589; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4 1590; IEEE-GOODFREXP-NEXT: ret void 1591; 1592; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rsq_f32_fpmath( 1593; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 1594; IEEE-BADFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]) 1595; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]] 1596; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 1597; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 1598; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 1599; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = fmul contract float [[X]], [[TMP2]] 1600; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) 1601; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 1602; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = fmul contract float [[TMP4]], [[TMP5]] 1603; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 1604; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2:![0-9]+]] 1605; IEEE-BADFREXP-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 1606; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MD_1ULP_MULTI_USE]]) 1607; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 1608; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_MD_1ULP_MULTI_USE]]) 1609; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 1610; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 1611; IEEE-BADFREXP-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 1612; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 1613; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 1614; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 1615; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[X]], [[TMP12]] 1616; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) 1617; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 1618; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = fmul contract float [[TMP14]], [[TMP15]] 1619; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 1620; IEEE-BADFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] 1621; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]] 1622; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 1623; IEEE-BADFREXP-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]) 1624; IEEE-BADFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]] 1625; IEEE-BADFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 1626; IEEE-BADFREXP-NEXT: [[AFN_25ULP:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[X]]) 1627; IEEE-BADFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 1628; IEEE-BADFREXP-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]]) 1629; IEEE-BADFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]] 1630; IEEE-BADFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 1631; IEEE-BADFREXP-NEXT: [[FAST_25ULP:%.*]] = call fast float @llvm.amdgcn.rsq.f32(float [[X]]) 1632; IEEE-BADFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 1633; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 1634; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = select contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00 1635; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul contract float [[X]], [[TMP17]] 1636; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP18]]) 1637; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = select contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00 1638; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = fmul contract float [[TMP19]], [[TMP20]] 1639; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 1640; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 1641; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = select contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00 1642; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = fmul contract float [[X]], [[TMP22]] 1643; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP23]]) 1644; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = select contract i1 [[TMP21]], float -4.096000e+03, float -1.000000e+00 1645; IEEE-BADFREXP-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fmul contract float [[TMP24]], [[TMP25]] 1646; IEEE-BADFREXP-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 1647; IEEE-BADFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] 1648; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_HALF_ULP]]) 1649; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0 1650; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_HALF_ULP]]) 1651; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = sub i32 0, [[TMP28]] 1652; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP27]]) 1653; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP29]]) 1654; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4 1655; IEEE-BADFREXP-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]) 1656; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MISMATCH_MD1]]) 1657; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 1658; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_MISMATCH_MD1]]) 1659; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]] 1660; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) 1661; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]]) 1662; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4 1663; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = fcmp olt float [[X]], 0x3810000000000000 1664; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 32, i32 0 1665; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP37]]) 1666; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP38]]) 1667; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = select i1 [[TMP36]], i32 -16, i32 0 1668; IEEE-BADFREXP-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP39]], i32 [[TMP40]]) 1669; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]] 1670; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4 1671; IEEE-BADFREXP-NEXT: ret void 1672; 1673; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_fpmath( 1674; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 1675; DAZ-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]) 1676; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]] 1677; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 1678; DAZ-NEXT: [[MD_1ULP:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) 1679; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 1680; DAZ-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 1681; DAZ-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 1682; DAZ-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MD_1ULP_MULTI_USE]]) 1683; DAZ-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 1684; DAZ-NEXT: [[MD_25ULP:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) 1685; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 1686; DAZ-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] 1687; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]] 1688; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 1689; DAZ-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]) 1690; DAZ-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]] 1691; DAZ-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 1692; DAZ-NEXT: [[AFN_25ULP:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[X]]) 1693; DAZ-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 1694; DAZ-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]]) 1695; DAZ-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]] 1696; DAZ-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 1697; DAZ-NEXT: [[FAST_25ULP:%.*]] = call fast float @llvm.amdgcn.rsq.f32(float [[X]]) 1698; DAZ-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 1699; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) 1700; DAZ-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 1701; DAZ-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) 1702; DAZ-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fneg contract float [[TMP1]] 1703; DAZ-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 1704; DAZ-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] 1705; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_HALF_ULP]]) 1706; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4 1707; DAZ-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]) 1708; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MISMATCH_MD1]]) 1709; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4 1710; DAZ-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 1711; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]] 1712; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4 1713; DAZ-NEXT: ret void 1714; 1715 %sqrt.x.no.md = call contract float @llvm.sqrt.f32(float %x) 1716 %no.md = fdiv contract float 1.000000e+00, %sqrt.x.no.md 1717 store volatile float %no.md, ptr addrspace(1) %out, align 4 1718 1719 ; Matches the rsq instruction accuracy 1720 %sqrt.md.1ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !2 1721 %md.1ulp = fdiv contract float 1.000000e+00, %sqrt.md.1ulp, !fpmath !2 1722 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 1723 1724 %sqrt.md.1ulp.multi.use = call contract float @llvm.sqrt.f32(float %x), !fpmath !2 1725 store volatile float %sqrt.md.1ulp.multi.use, ptr addrspace(1) %out, align 4 1726 %md.1ulp.multi.use = fdiv contract float 1.000000e+00, %sqrt.md.1ulp.multi.use, !fpmath !2 1727 store volatile float %md.1ulp.multi.use, ptr addrspace(1) %out, align 4 1728 1729 %sqrt.md.25ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !0 1730 %md.25ulp = fdiv contract float 1.000000e+00, %sqrt.md.25ulp, !fpmath !0 1731 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 1732 1733 %sqrt.md.half.ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !1 1734 %md.half.ulp = fdiv contract float 1.000000e+00, %sqrt.md.half.ulp, !fpmath !1 1735 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 1736 1737 %sqrt.x.afn.no.md = call contract afn float @llvm.sqrt.f32(float %x) 1738 %afn.no.md = fdiv contract afn float 1.000000e+00, %sqrt.x.afn.no.md 1739 store volatile float %afn.no.md, ptr addrspace(1) %out, align 4 1740 1741 %sqrt.x.afn.25ulp = call contract afn float @llvm.sqrt.f32(float %x), !fpmath !0 1742 %afn.25ulp = fdiv contract afn float 1.000000e+00, %sqrt.x.afn.25ulp, !fpmath !0 1743 store volatile float %afn.25ulp, ptr addrspace(1) %out, align 4 1744 1745 %sqrt.x.fast.no.md = call fast float @llvm.sqrt.f32(float %x) 1746 %fast.no.md = fdiv fast float 1.000000e+00, %sqrt.x.fast.no.md 1747 store volatile float %fast.no.md, ptr addrspace(1) %out, align 4 1748 1749 %sqrt.x.fast.25ulp = call fast float @llvm.sqrt.f32(float %x), !fpmath !0 1750 %fast.25ulp = fdiv fast float 1.000000e+00, %sqrt.x.fast.25ulp, !fpmath !0 1751 store volatile float %fast.25ulp, ptr addrspace(1) %out, align 4 1752 1753 1754 ; Test mismatched metadata/flags between the sqrt and fdiv 1755 1756 ; Test the expected opencl default pattern 1757 %sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv 1758 %fdiv.opencl = fdiv contract float 1.0, %sqrt.x.3ulp, !fpmath !0 1759 store volatile float %fdiv.opencl, ptr addrspace(1) %out, align 4 1760 1761 %neg.sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv 1762 %neg.fdiv.opencl = fdiv contract float -1.0, %neg.sqrt.x.3ulp, !fpmath !0 1763 store volatile float %neg.fdiv.opencl, ptr addrspace(1) %out, align 4 1764 1765 ; sqrt demands higher precision than fdiv 1766 %sqrt.x.half.ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !1 1767 %fdiv.sqrt.mismatch.md0 = fdiv contract float 1.0, %sqrt.x.half.ulp, !fpmath !0 1768 store volatile float %fdiv.sqrt.mismatch.md0, ptr addrspace(1) %out, align 4 1769 1770 ; sqrt demands full precision but has afn 1771 %sqrt.mismatch.md1 = call afn float @llvm.sqrt.f32(float %x) 1772 %fdiv.sqrt.mismatch.md1 = fdiv contract float 1.0, %sqrt.mismatch.md1, !fpmath !0 1773 store volatile float %fdiv.sqrt.mismatch.md1, ptr addrspace(1) %out, align 4 1774 1775 ; sqrt has relaxed precision fdiv has afn only 1776 %sqrt.mismatch.md2 = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 1777 %fdiv.sqrt.mismatch.md2 = fdiv contract afn float 1.0, %sqrt.mismatch.md2 1778 store volatile float %fdiv.sqrt.mismatch.md2, ptr addrspace(1) %out, align 4 1779 1780 ret void 1781} 1782 1783define amdgpu_kernel void @rsq_f32_fpmath_flags(ptr addrspace(1) %out, float %x) { 1784; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_fpmath_flags( 1785; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 1786; IEEE-NEXT: [[TMP1:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000 1787; IEEE-NEXT: [[TMP2:%.*]] = select nnan ninf contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 1788; IEEE-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract float [[X]], [[TMP2]] 1789; IEEE-NEXT: [[TMP4:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) 1790; IEEE-NEXT: [[TMP5:%.*]] = select nnan ninf contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 1791; IEEE-NEXT: [[FDIV_OPENCL_NINF_NNAN:%.*]] = fmul nnan ninf contract float [[TMP4]], [[TMP5]] 1792; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 1793; IEEE-NEXT: [[TMP6:%.*]] = fcmp ninf contract olt float [[X]], 0x3810000000000000 1794; IEEE-NEXT: [[TMP7:%.*]] = select ninf contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00 1795; IEEE-NEXT: [[TMP8:%.*]] = fmul ninf contract float [[X]], [[TMP7]] 1796; IEEE-NEXT: [[TMP9:%.*]] = call ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP8]]) 1797; IEEE-NEXT: [[TMP10:%.*]] = select ninf contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00 1798; IEEE-NEXT: [[FDIV_OPENCL_NINF:%.*]] = fmul ninf contract float [[TMP9]], [[TMP10]] 1799; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NINF]], ptr addrspace(1) [[OUT]], align 4 1800; IEEE-NEXT: [[TMP11:%.*]] = fcmp nnan contract olt float [[X]], 0x3810000000000000 1801; IEEE-NEXT: [[TMP12:%.*]] = select nnan contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 1802; IEEE-NEXT: [[TMP13:%.*]] = fmul nnan contract float [[X]], [[TMP12]] 1803; IEEE-NEXT: [[TMP14:%.*]] = call nnan contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) 1804; IEEE-NEXT: [[TMP15:%.*]] = select nnan contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 1805; IEEE-NEXT: [[FDIV_OPENCL_NNAN:%.*]] = fmul nnan contract float [[TMP14]], [[TMP15]] 1806; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN]], ptr addrspace(1) [[OUT]], align 4 1807; IEEE-NEXT: [[TMP16:%.*]] = fcmp nsz contract olt float [[X]], 0x3810000000000000 1808; IEEE-NEXT: [[TMP17:%.*]] = select nsz contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00 1809; IEEE-NEXT: [[TMP18:%.*]] = fmul nsz contract float [[X]], [[TMP17]] 1810; IEEE-NEXT: [[TMP19:%.*]] = call nsz contract float @llvm.amdgcn.rsq.f32(float [[TMP18]]) 1811; IEEE-NEXT: [[TMP20:%.*]] = select nsz contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00 1812; IEEE-NEXT: [[FDIV_OPENCL_NSZ:%.*]] = fmul nsz contract float [[TMP19]], [[TMP20]] 1813; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NSZ]], ptr addrspace(1) [[OUT]], align 4 1814; IEEE-NEXT: [[TMP21:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000 1815; IEEE-NEXT: [[TMP22:%.*]] = select nnan ninf contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00 1816; IEEE-NEXT: [[TMP23:%.*]] = fmul nnan ninf contract float [[X]], [[TMP22]] 1817; IEEE-NEXT: [[TMP24:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP23]]) 1818; IEEE-NEXT: [[TMP25:%.*]] = select nnan ninf contract i1 [[TMP21]], float 4.096000e+03, float 1.000000e+00 1819; IEEE-NEXT: [[FDIV_OPENCL_NNAN_MIX0:%.*]] = fmul nnan ninf contract float [[TMP24]], [[TMP25]] 1820; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX0]], ptr addrspace(1) [[OUT]], align 4 1821; IEEE-NEXT: [[TMP26:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000 1822; IEEE-NEXT: [[TMP27:%.*]] = select nnan ninf contract i1 [[TMP26]], float 0x4170000000000000, float 1.000000e+00 1823; IEEE-NEXT: [[TMP28:%.*]] = fmul nnan ninf contract float [[X]], [[TMP27]] 1824; IEEE-NEXT: [[TMP29:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP28]]) 1825; IEEE-NEXT: [[TMP30:%.*]] = select nnan ninf contract i1 [[TMP26]], float 4.096000e+03, float 1.000000e+00 1826; IEEE-NEXT: [[FDIV_OPENCL_NNAN_MIX1:%.*]] = fmul nnan ninf contract float [[TMP29]], [[TMP30]] 1827; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX1]], ptr addrspace(1) [[OUT]], align 4 1828; IEEE-NEXT: ret void 1829; 1830; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_fpmath_flags( 1831; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 1832; DAZ-NEXT: [[FDIV_OPENCL_NINF_NNAN:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]]) 1833; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 1834; DAZ-NEXT: [[FDIV_OPENCL_NINF:%.*]] = call ninf contract float @llvm.amdgcn.rsq.f32(float [[X]]) 1835; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NINF]], ptr addrspace(1) [[OUT]], align 4 1836; DAZ-NEXT: [[FDIV_OPENCL_NNAN:%.*]] = call nnan contract float @llvm.amdgcn.rsq.f32(float [[X]]) 1837; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN]], ptr addrspace(1) [[OUT]], align 4 1838; DAZ-NEXT: [[FDIV_OPENCL_NSZ:%.*]] = call nsz contract float @llvm.amdgcn.rsq.f32(float [[X]]) 1839; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NSZ]], ptr addrspace(1) [[OUT]], align 4 1840; DAZ-NEXT: [[FDIV_OPENCL_NNAN_MIX0:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]]) 1841; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX0]], ptr addrspace(1) [[OUT]], align 4 1842; DAZ-NEXT: [[FDIV_OPENCL_NNAN_MIX1:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]]) 1843; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX1]], ptr addrspace(1) [[OUT]], align 4 1844; DAZ-NEXT: ret void 1845; 1846 %sqrt.x.3ulp.ninf.nnan = call contract ninf nnan float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv 1847 %fdiv.opencl.ninf.nnan = fdiv contract ninf nnan float 1.0, %sqrt.x.3ulp.ninf.nnan, !fpmath !0 1848 store volatile float %fdiv.opencl.ninf.nnan, ptr addrspace(1) %out, align 4 1849 1850 %sqrt.x.3ulp.ninf = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv 1851 %fdiv.opencl.ninf = fdiv contract ninf float 1.0, %sqrt.x.3ulp.ninf, !fpmath !0 1852 store volatile float %fdiv.opencl.ninf, ptr addrspace(1) %out, align 4 1853 1854 %sqrt.x.3ulp.nnan = call contract nnan float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv 1855 %fdiv.opencl.nnan = fdiv contract nnan float 1.0, %sqrt.x.3ulp.nnan, !fpmath !0 1856 store volatile float %fdiv.opencl.nnan, ptr addrspace(1) %out, align 4 1857 1858 %sqrt.x.3ulp.nsz = call contract nsz float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv 1859 %fdiv.opencl.nsz = fdiv contract nsz float 1.0, %sqrt.x.3ulp.nsz, !fpmath !0 1860 store volatile float %fdiv.opencl.nsz, ptr addrspace(1) %out, align 4 1861 1862 %sqrt.x.3ulp.ninf.mix0 = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3 1863 %fdiv.opencl.nnan.mix0 = fdiv contract nnan float 1.0, %sqrt.x.3ulp.ninf.mix0, !fpmath !0 1864 store volatile float %fdiv.opencl.nnan.mix0, ptr addrspace(1) %out, align 4 1865 1866 %sqrt.x.3ulp.ninf.mix1 = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3 1867 %fdiv.opencl.nnan.mix1 = fdiv contract nnan float 1.0, %sqrt.x.3ulp.ninf.mix1, !fpmath !0 1868 store volatile float %fdiv.opencl.nnan.mix1, ptr addrspace(1) %out, align 4 1869 1870 ret void 1871} 1872 1873define float @rsq_f32_missing_contract0(float %x) { 1874; IEEE-GOODFREXP-LABEL: define float @rsq_f32_missing_contract0( 1875; IEEE-GOODFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { 1876; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] 1877; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) 1878; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 1879; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 1880; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 1881; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 1882; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 1883; IEEE-GOODFREXP-NEXT: ret float [[FDIV_OPENCL]] 1884; 1885; IEEE-BADFREXP-LABEL: define float @rsq_f32_missing_contract0( 1886; IEEE-BADFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { 1887; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] 1888; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) 1889; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 1890; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_3ULP]]) 1891; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 1892; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 1893; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 1894; IEEE-BADFREXP-NEXT: ret float [[FDIV_OPENCL]] 1895; 1896; DAZ-LABEL: define float @rsq_f32_missing_contract0( 1897; DAZ-SAME: float [[X:%.*]]) #[[ATTR1]] { 1898; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 1899; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]]) 1900; DAZ-NEXT: ret float [[FDIV_OPENCL]] 1901; 1902 %sqrt.x.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2 1903 %fdiv.opencl = fdiv contract float 1.0, %sqrt.x.3ulp, !fpmath !2 1904 ret float %fdiv.opencl 1905} 1906 1907define float @rsq_f32_missing_contract1(float %x) { 1908; IEEE-GOODFREXP-LABEL: define float @rsq_f32_missing_contract1( 1909; IEEE-GOODFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { 1910; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] 1911; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) 1912; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 1913; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 1914; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 1915; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 1916; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 1917; IEEE-GOODFREXP-NEXT: ret float [[FDIV_OPENCL]] 1918; 1919; IEEE-BADFREXP-LABEL: define float @rsq_f32_missing_contract1( 1920; IEEE-BADFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { 1921; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] 1922; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) 1923; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 1924; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_3ULP]]) 1925; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 1926; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 1927; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 1928; IEEE-BADFREXP-NEXT: ret float [[FDIV_OPENCL]] 1929; 1930; DAZ-LABEL: define float @rsq_f32_missing_contract1( 1931; DAZ-SAME: float [[X:%.*]]) #[[ATTR1]] { 1932; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) 1933; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]]) 1934; DAZ-NEXT: ret float [[FDIV_OPENCL]] 1935; 1936 %sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !2 1937 %fdiv.opencl = fdiv float 1.0, %sqrt.x.3ulp, !fpmath !2 1938 ret float %fdiv.opencl 1939} 1940 1941define float @rsq_f32_flag_merge(float %x) { 1942; IEEE-LABEL: define float @rsq_f32_flag_merge( 1943; IEEE-SAME: float [[X:%.*]]) #[[ATTR1]] { 1944; IEEE-NEXT: [[TMP1:%.*]] = fcmp ninf nsz contract olt float [[X]], 0x3810000000000000 1945; IEEE-NEXT: [[TMP2:%.*]] = select ninf nsz contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 1946; IEEE-NEXT: [[TMP3:%.*]] = fmul ninf nsz contract float [[X]], [[TMP2]] 1947; IEEE-NEXT: [[TMP4:%.*]] = call ninf nsz contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) 1948; IEEE-NEXT: [[TMP5:%.*]] = select ninf nsz contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 1949; IEEE-NEXT: [[FDIV_OPENCL:%.*]] = fmul ninf nsz contract float [[TMP4]], [[TMP5]] 1950; IEEE-NEXT: ret float [[FDIV_OPENCL]] 1951; 1952; DAZ-LABEL: define float @rsq_f32_flag_merge( 1953; DAZ-SAME: float [[X:%.*]]) #[[ATTR1]] { 1954; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call ninf nsz contract float @llvm.amdgcn.rsq.f32(float [[X]]) 1955; DAZ-NEXT: ret float [[FDIV_OPENCL]] 1956; 1957 %sqrt.x.3ulp = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !2 1958 %fdiv.opencl = fdiv contract nsz float 1.0, %sqrt.x.3ulp, !fpmath !2 1959 ret float %fdiv.opencl 1960} 1961 1962define amdgpu_kernel void @rsq_f32_knownfinite(ptr addrspace(1) %out, float nofpclass(nan) %no.nan, 1963; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_knownfinite( 1964; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] { 1965; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_NAN]], 0x3810000000000000 1966; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 1967; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_NAN]], [[TMP2]] 1968; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) 1969; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 1970; IEEE-NEXT: [[FDIV_OPENCL_NO_NAN:%.*]] = fmul contract float [[TMP4]], [[TMP5]] 1971; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_NAN]], ptr addrspace(1) [[OUT]], align 4 1972; IEEE-NEXT: [[TMP6:%.*]] = fcmp contract olt float [[NO_INF]], 0x3810000000000000 1973; IEEE-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00 1974; IEEE-NEXT: [[TMP8:%.*]] = fmul contract float [[NO_INF]], [[TMP7]] 1975; IEEE-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP8]]) 1976; IEEE-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00 1977; IEEE-NEXT: [[FDIV_OPENCL_NO_INF:%.*]] = fmul contract float [[TMP9]], [[TMP10]] 1978; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_INF]], ptr addrspace(1) [[OUT]], align 4 1979; IEEE-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[NO_INF_NAN]], 0x3810000000000000 1980; IEEE-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 1981; IEEE-NEXT: [[TMP13:%.*]] = fmul contract float [[NO_INF_NAN]], [[TMP12]] 1982; IEEE-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) 1983; IEEE-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 1984; IEEE-NEXT: [[FDIV_OPENCL_NO_INF_NAN:%.*]] = fmul contract float [[TMP14]], [[TMP15]] 1985; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4 1986; IEEE-NEXT: ret void 1987; 1988; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_knownfinite( 1989; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] { 1990; DAZ-NEXT: [[FDIV_OPENCL_NO_NAN:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_NAN]]) 1991; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_NAN]], ptr addrspace(1) [[OUT]], align 4 1992; DAZ-NEXT: [[FDIV_OPENCL_NO_INF:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_INF]]) 1993; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_INF]], ptr addrspace(1) [[OUT]], align 4 1994; DAZ-NEXT: [[FDIV_OPENCL_NO_INF_NAN:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_INF_NAN]]) 1995; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4 1996; DAZ-NEXT: ret void 1997; 1998 float nofpclass(nan) %no.inf, 1999 float nofpclass(inf nan) %no.inf.nan) { 2000 %sqrt.x.3ulp.no.nan = call contract float @llvm.sqrt.f32(float %no.nan), !fpmath !3 2001 %fdiv.opencl.no.nan = fdiv contract float 1.0, %sqrt.x.3ulp.no.nan, !fpmath !0 2002 store volatile float %fdiv.opencl.no.nan, ptr addrspace(1) %out, align 4 2003 2004 %sqrt.x.3ulp.no.inf = call contract float @llvm.sqrt.f32(float %no.inf), !fpmath !3 2005 %fdiv.opencl.no.inf = fdiv contract float 1.0, %sqrt.x.3ulp.no.inf, !fpmath !0 2006 store volatile float %fdiv.opencl.no.inf, ptr addrspace(1) %out, align 4 2007 2008 %sqrt.x.3ulp.no.inf.nan = call contract float @llvm.sqrt.f32(float %no.inf.nan), !fpmath !3 2009 %fdiv.opencl.no.inf.nan = fdiv contract float 1.0, %sqrt.x.3ulp.no.inf.nan, !fpmath !0 2010 store volatile float %fdiv.opencl.no.inf.nan, ptr addrspace(1) %out, align 4 2011 2012 ret void 2013} 2014 2015define amdgpu_kernel void @rsq_f32_known_nozero(ptr addrspace(1) %out, float nofpclass(zero) %no.zero, float nofpclass(zero sub) %no.zero.sub) { 2016; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_known_nozero( 2017; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] { 2018; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_ZERO]], 0x3810000000000000 2019; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 2020; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_ZERO]], [[TMP2]] 2021; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) 2022; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 2023; IEEE-NEXT: [[FDIV_OPENCL_NO_ZERO:%.*]] = fmul contract float [[TMP4]], [[TMP5]] 2024; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4 2025; IEEE-NEXT: [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO_SUB]]) 2026; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4 2027; IEEE-NEXT: ret void 2028; 2029; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_known_nozero( 2030; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] { 2031; DAZ-NEXT: [[FDIV_OPENCL_NO_ZERO:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO]]) 2032; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4 2033; DAZ-NEXT: [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO_SUB]]) 2034; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4 2035; DAZ-NEXT: ret void 2036; 2037 %sqrt.x.3ulp.no.zero = call contract float @llvm.sqrt.f32(float %no.zero), !fpmath !3 2038 %fdiv.opencl.no.zero = fdiv contract float 1.0, %sqrt.x.3ulp.no.zero, !fpmath !0 2039 store volatile float %fdiv.opencl.no.zero, ptr addrspace(1) %out, align 4 2040 2041 %sqrt.x.3ulp.no.zero.sub = call contract float @llvm.sqrt.f32(float %no.zero.sub), !fpmath !3 2042 %fdiv.opencl.no.zero.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.zero.sub, !fpmath !0 2043 store volatile float %fdiv.opencl.no.zero.sub, ptr addrspace(1) %out, align 4 2044 2045 ret void 2046} 2047 2048define amdgpu_kernel void @rsq_f32_known_nosub(ptr addrspace(1) %out, float nofpclass(sub) %no.sub, float nofpclass(psub) %no.psub, float nofpclass(nsub) %no.nsub) { 2049; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_known_nosub( 2050; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]]) #[[ATTR1]] { 2051; IEEE-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_SUB]]) 2052; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 2053; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_PSUB]], 0x3810000000000000 2054; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 2055; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_PSUB]], [[TMP2]] 2056; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) 2057; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 2058; IEEE-NEXT: [[FDIV_OPENCL_NO_PSUB:%.*]] = fmul contract float [[TMP4]], [[TMP5]] 2059; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4 2060; IEEE-NEXT: [[TMP6:%.*]] = fcmp contract olt float [[NO_NSUB]], 0x3810000000000000 2061; IEEE-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00 2062; IEEE-NEXT: [[TMP8:%.*]] = fmul contract float [[NO_NSUB]], [[TMP7]] 2063; IEEE-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP8]]) 2064; IEEE-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00 2065; IEEE-NEXT: [[FDIV_OPENCL_NO_NSUB:%.*]] = fmul contract float [[TMP9]], [[TMP10]] 2066; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4 2067; IEEE-NEXT: ret void 2068; 2069; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_known_nosub( 2070; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]]) #[[ATTR1]] { 2071; DAZ-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_SUB]]) 2072; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 2073; DAZ-NEXT: [[FDIV_OPENCL_NO_PSUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_PSUB]]) 2074; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4 2075; DAZ-NEXT: [[FDIV_OPENCL_NO_NSUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_NSUB]]) 2076; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4 2077; DAZ-NEXT: ret void 2078; 2079 %sqrt.x.3ulp.no.sub = call contract float @llvm.sqrt.f32(float %no.sub), !fpmath !3 2080 %fdiv.opencl.no.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0 2081 store volatile float %fdiv.opencl.no.sub, ptr addrspace(1) %out, align 4 2082 2083 %sqrt.x.3ulp.no.psub = call contract float @llvm.sqrt.f32(float %no.psub), !fpmath !3 2084 %fdiv.opencl.no.psub = fdiv contract float 1.0, %sqrt.x.3ulp.no.psub, !fpmath !0 2085 store volatile float %fdiv.opencl.no.psub, ptr addrspace(1) %out, align 4 2086 2087 %sqrt.x.3ulp.no.nsub = call contract float @llvm.sqrt.f32(float %no.nsub), !fpmath !3 2088 %fdiv.opencl.no.nsub = fdiv contract float 1.0, %sqrt.x.3ulp.no.nsub, !fpmath !0 2089 store volatile float %fdiv.opencl.no.nsub, ptr addrspace(1) %out, align 4 2090 2091 ret void 2092} 2093 2094define amdgpu_kernel void @rsq_f32_assume_nosub(ptr addrspace(1) %out, float %x) { 2095; CHECK-LABEL: define amdgpu_kernel void @rsq_f32_assume_nosub( 2096; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { 2097; CHECK-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) 2098; CHECK-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 2099; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) 2100; CHECK-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) 2101; CHECK-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 2102; CHECK-NEXT: ret void 2103; 2104 %fabs.x = call float @llvm.fabs.f32(float %x) 2105 %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000 2106 call void @llvm.assume(i1 %is.not.subnormal) 2107 %sqrt.x.3ulp.no.sub = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 2108 %fdiv.opencl.no.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0 2109 store volatile float %fdiv.opencl.no.sub, ptr addrspace(1) %out, align 4 2110 ret void 2111} 2112 2113define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) { 2114; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( 2115; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 2116; IEEE-GOODFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) 2117; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] 2118; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 2119; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] 2120; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 2121; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1 2122; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0 2123; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1 2124; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = fcmp contract olt float [[TMP3]], 0x3810000000000000 2125; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = select contract i1 [[TMP5]], float 0x4170000000000000, float 1.000000e+00 2126; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = fmul contract float [[TMP3]], [[TMP6]] 2127; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP7]]) 2128; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = select contract i1 [[TMP5]], float 4.096000e+03, float 1.000000e+00 2129; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = fmul contract float [[TMP8]], [[TMP9]] 2130; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[TMP4]], 0x3810000000000000 2131; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 2132; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[TMP4]], [[TMP12]] 2133; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) 2134; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 2135; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = fmul contract float [[TMP14]], [[TMP15]] 2136; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0 2137; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i64 1 2138; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 2139; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] 2140; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0 2141; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1 2142; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[X]], i64 0 2143; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[X]], i64 1 2144; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = fcmp contract olt float [[TMP20]], 0x3810000000000000 2145; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = select contract i1 [[TMP22]], float 0x4170000000000000, float 1.000000e+00 2146; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = fmul contract float [[TMP20]], [[TMP23]] 2147; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP24]]) 2148; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = select contract i1 [[TMP22]], float 4.096000e+03, float 1.000000e+00 2149; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = fmul contract float [[TMP25]], [[TMP26]] 2150; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]]) 2151; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 2152; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 2153; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 2154; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 2155; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 2156; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1 2157; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]] 2158; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] 2159; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) 2160; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i64 0 2161; IEEE-GOODFREXP-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1 2162; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4 2163; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]] 2164; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0 2165; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1 2166; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractelement <2 x float> [[X]], i64 0 2167; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[X]], i64 1 2168; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = fcmp contract olt float [[TMP41]], 0x3810000000000000 2169; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = select contract i1 [[TMP43]], float 0x4170000000000000, float 1.000000e+00 2170; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = fmul contract float [[TMP41]], [[TMP44]] 2171; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP45]]) 2172; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = select contract i1 [[TMP43]], float 4.096000e+03, float 1.000000e+00 2173; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = fmul contract float [[TMP46]], [[TMP47]] 2174; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = fcmp contract olt float [[TMP42]], 0x3810000000000000 2175; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = select contract i1 [[TMP49]], float 0x4170000000000000, float 1.000000e+00 2176; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = fmul contract float [[TMP42]], [[TMP50]] 2177; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP51]]) 2178; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = select contract i1 [[TMP49]], float 4.096000e+03, float 1.000000e+00 2179; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = fmul contract float [[TMP52]], [[TMP53]] 2180; IEEE-GOODFREXP-NEXT: [[TMP55:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0 2181; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP55]], float [[TMP54]], i64 1 2182; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 2183; IEEE-GOODFREXP-NEXT: ret void 2184; 2185; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( 2186; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 2187; IEEE-BADFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) 2188; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] 2189; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 2190; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] 2191; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 2192; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1 2193; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0 2194; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1 2195; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = fcmp contract olt float [[TMP3]], 0x3810000000000000 2196; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = select contract i1 [[TMP5]], float 0x4170000000000000, float 1.000000e+00 2197; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = fmul contract float [[TMP3]], [[TMP6]] 2198; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP7]]) 2199; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = select contract i1 [[TMP5]], float 4.096000e+03, float 1.000000e+00 2200; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = fmul contract float [[TMP8]], [[TMP9]] 2201; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[TMP4]], 0x3810000000000000 2202; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 2203; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[TMP4]], [[TMP12]] 2204; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) 2205; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 2206; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = fmul contract float [[TMP14]], [[TMP15]] 2207; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0 2208; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i64 1 2209; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 2210; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] 2211; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0 2212; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1 2213; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[X]], i64 0 2214; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[X]], i64 1 2215; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = fcmp contract olt float [[TMP20]], 0x3810000000000000 2216; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = select contract i1 [[TMP22]], float 0x4170000000000000, float 1.000000e+00 2217; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = fmul contract float [[TMP20]], [[TMP23]] 2218; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP24]]) 2219; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = select contract i1 [[TMP22]], float 4.096000e+03, float 1.000000e+00 2220; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = fmul contract float [[TMP25]], [[TMP26]] 2221; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]]) 2222; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 2223; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP19]]) 2224; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 2225; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 2226; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 2227; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison) 2228; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]] 2229; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] 2230; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) 2231; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i64 0 2232; IEEE-BADFREXP-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1 2233; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4 2234; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]] 2235; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0 2236; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1 2237; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = extractelement <2 x float> [[X]], i64 0 2238; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[X]], i64 1 2239; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = fcmp contract olt float [[TMP41]], 0x3810000000000000 2240; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = select contract i1 [[TMP43]], float 0x4170000000000000, float 1.000000e+00 2241; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = fmul contract float [[TMP41]], [[TMP44]] 2242; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP45]]) 2243; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = select contract i1 [[TMP43]], float 4.096000e+03, float 1.000000e+00 2244; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = fmul contract float [[TMP46]], [[TMP47]] 2245; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = fcmp contract olt float [[TMP42]], 0x3810000000000000 2246; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = select contract i1 [[TMP49]], float 0x4170000000000000, float 1.000000e+00 2247; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = fmul contract float [[TMP42]], [[TMP50]] 2248; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP51]]) 2249; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = select contract i1 [[TMP49]], float 4.096000e+03, float 1.000000e+00 2250; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = fmul contract float [[TMP52]], [[TMP53]] 2251; IEEE-BADFREXP-NEXT: [[TMP55:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0 2252; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP55]], float [[TMP54]], i64 1 2253; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 2254; IEEE-BADFREXP-NEXT: ret void 2255; 2256; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( 2257; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 2258; DAZ-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) 2259; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] 2260; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 2261; DAZ-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2:![0-9]+]] 2262; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 2263; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1 2264; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0 2265; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1 2266; DAZ-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) 2267; DAZ-NEXT: [[TMP6:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP4]]) 2268; DAZ-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0 2269; DAZ-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP7]], float [[TMP6]], i64 1 2270; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 2271; DAZ-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[X]], i64 0 2272; DAZ-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[X]], i64 1 2273; DAZ-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]]) 2274; DAZ-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP9]]) 2275; DAZ-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0 2276; DAZ-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP11]], i64 1 2277; DAZ-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0 2278; DAZ-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1 2279; DAZ-NEXT: [[TMP15:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 2280; DAZ-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP14]]) 2281; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0 2282; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1 2283; DAZ-NEXT: [[TMP19:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP17]]) 2284; DAZ-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 2285; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0 2286; DAZ-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP20]], 1 2287; DAZ-NEXT: [[TMP23:%.*]] = fmul contract float [[TMP21]], [[TMP19]] 2288; DAZ-NEXT: [[TMP24:%.*]] = sub i32 [[TMP22]], [[TMP18]] 2289; DAZ-NEXT: [[TMP25:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP23]], i32 [[TMP24]]) 2290; DAZ-NEXT: [[TMP26:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i64 0 2291; DAZ-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP26]], float [[TMP25]], i64 1 2292; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4 2293; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]] 2294; DAZ-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0 2295; DAZ-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1 2296; DAZ-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[X]], i64 0 2297; DAZ-NEXT: [[TMP30:%.*]] = extractelement <2 x float> [[X]], i64 1 2298; DAZ-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP29]]) 2299; DAZ-NEXT: [[TMP32:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP30]]) 2300; DAZ-NEXT: [[TMP33:%.*]] = insertelement <2 x float> poison, float [[TMP31]], i64 0 2301; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP33]], float [[TMP32]], i64 1 2302; DAZ-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 2303; DAZ-NEXT: ret void 2304; 2305 %sqrt.x.no.md = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) 2306 %no.md = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.x.no.md 2307 store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 4 2308 2309 ; Matches the rsq instruction accuracy 2310 %sqrt.md.1ulp = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2 2311 %md.1ulp = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.md.1ulp, !fpmath !2 2312 store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 4 2313 2314 ; Matches the rsq instruction accuracy 2315 %sqrt.md.1ulp.undef = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2 2316 %md.1ulp.undef = fdiv contract <2 x float> <float 1.0, float poison>, %sqrt.md.1ulp.undef, !fpmath !2 2317 store volatile <2 x float> %md.1ulp.undef, ptr addrspace(1) %out, align 4 2318 2319 ; Test mismatched metadata/flags between the sqrt and fdiv 2320 2321 ; Test the expected opencl default pattern 2322 %sqrt.x.3ulp = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv 2323 %fdiv.opencl = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.x.3ulp, !fpmath !0 2324 store volatile <2 x float> %fdiv.opencl, ptr addrspace(1) %out, align 4 2325 2326 ret void 2327} 2328 2329define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd(ptr addrspace(1) %out, float %x, float %y, float %denom) { 2330; CHECK-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd( 2331; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2332; CHECK-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]] 2333; CHECK-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]] 2334; CHECK-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2335; CHECK-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2336; CHECK-NEXT: ret void 2337; 2338 %arcp0 = fdiv arcp float %x, %denom 2339 %arcp1 = fdiv arcp float %y, %denom 2340 store volatile float %arcp0, ptr addrspace(1) %out 2341 store volatile float %arcp1, ptr addrspace(1) %out 2342 ret void 2343} 2344 2345define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) { 2346; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp( 2347; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2348; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2349; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 2350; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 2351; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 2352; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 2353; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 2354; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] 2355; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2356; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 2357; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 2358; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] 2359; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) 2360; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) 2361; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] 2362; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2363; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2364; IEEE-GOODFREXP-NEXT: ret void 2365; 2366; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp( 2367; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2368; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2369; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 2370; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 2371; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 2372; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 2373; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 2374; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] 2375; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2376; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 2377; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 2378; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] 2379; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) 2380; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) 2381; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] 2382; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2383; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2384; IEEE-BADFREXP-NEXT: ret void 2385; 2386; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp( 2387; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2388; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 2389; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]] 2390; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 2391; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]] 2392; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2393; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2394; DAZ-NEXT: ret void 2395; 2396 %arcp0 = fdiv arcp float %x, %denom, !fpmath !0 2397 %arcp1 = fdiv arcp float %y, %denom, !fpmath !0 2398 store volatile float %arcp0, ptr addrspace(1) %out 2399 store volatile float %arcp1, ptr addrspace(1) %out 2400 ret void 2401} 2402 2403define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3(ptr addrspace(1) %out, float %x, float %y, float %z, float %denom) { 2404; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3( 2405; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2406; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2407; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 2408; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 2409; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 2410; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 2411; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 2412; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] 2413; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2414; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 2415; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 2416; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] 2417; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) 2418; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) 2419; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] 2420; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2421; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0 2422; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1 2423; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = sub i32 0, [[TMP15]] 2424; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP14]]) 2425; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP16]]) 2426; IEEE-GOODFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP18]] 2427; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2428; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2429; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 2430; IEEE-GOODFREXP-NEXT: ret void 2431; 2432; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3( 2433; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2434; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2435; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 2436; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 2437; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 2438; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 2439; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 2440; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] 2441; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2442; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 2443; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 2444; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] 2445; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) 2446; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) 2447; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] 2448; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2449; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0 2450; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 2451; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = sub i32 0, [[TMP15]] 2452; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP14]]) 2453; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP16]]) 2454; IEEE-BADFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP18]] 2455; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2456; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2457; IEEE-BADFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 2458; IEEE-BADFREXP-NEXT: ret void 2459; 2460; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3( 2461; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2462; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 2463; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]] 2464; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 2465; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]] 2466; DAZ-NEXT: [[TMP3:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 2467; DAZ-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP3]] 2468; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2469; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2470; DAZ-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 2471; DAZ-NEXT: ret void 2472; 2473 %arcp0 = fdiv arcp float %x, %denom, !fpmath !0 2474 %arcp1 = fdiv arcp float %y, %denom, !fpmath !0 2475 %arcp2 = fdiv arcp float %z, %denom, !fpmath !0 2476 store volatile float %arcp0, ptr addrspace(1) %out 2477 store volatile float %arcp1, ptr addrspace(1) %out 2478 store volatile float %arcp2, ptr addrspace(1) %out 2479 ret void 2480} 2481 2482define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd(ptr addrspace(1) %out, float %x, float %y, float %denom) { 2483; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd( 2484; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2485; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2486; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 2487; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 2488; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 2489; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 2490; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 2491; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] 2492; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]] 2493; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2494; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2495; IEEE-GOODFREXP-NEXT: ret void 2496; 2497; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd( 2498; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2499; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2500; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 2501; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 2502; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 2503; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 2504; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 2505; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] 2506; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]] 2507; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2508; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2509; IEEE-BADFREXP-NEXT: ret void 2510; 2511; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd( 2512; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2513; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 2514; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]] 2515; DAZ-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]] 2516; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2517; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2518; DAZ-NEXT: ret void 2519; 2520 %arcp0 = fdiv arcp float %x, %denom, !fpmath !0 2521 %arcp1 = fdiv arcp float %y, %denom 2522 store volatile float %arcp0, ptr addrspace(1) %out 2523 store volatile float %arcp1, ptr addrspace(1) %out 2524 ret void 2525} 2526 2527define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) { 2528; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp( 2529; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2530; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]] 2531; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2532; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 2533; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 2534; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 2535; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 2536; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 2537; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP6]] 2538; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2539; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2540; IEEE-GOODFREXP-NEXT: ret void 2541; 2542; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp( 2543; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2544; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]] 2545; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2546; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 2547; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 2548; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 2549; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 2550; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 2551; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP6]] 2552; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2553; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2554; IEEE-BADFREXP-NEXT: ret void 2555; 2556; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp( 2557; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2558; DAZ-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]] 2559; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 2560; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP1]] 2561; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2562; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2563; DAZ-NEXT: ret void 2564; 2565 %arcp0 = fdiv arcp float %x, %denom 2566 %arcp1 = fdiv arcp float %y, %denom, !fpmath !0 2567 store volatile float %arcp0, ptr addrspace(1) %out 2568 store volatile float %arcp1, ptr addrspace(1) %out 2569 ret void 2570} 2571 2572define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) { 2573; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp( 2574; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2575; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2576; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 2577; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 2578; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 2579; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 2580; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 2581; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] 2582; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2583; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 2584; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 2585; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] 2586; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) 2587; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) 2588; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] 2589; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2590; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2591; IEEE-GOODFREXP-NEXT: ret void 2592; 2593; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp( 2594; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2595; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2596; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 2597; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 2598; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 2599; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 2600; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 2601; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] 2602; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2603; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 2604; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 2605; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] 2606; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) 2607; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) 2608; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] 2609; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2610; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2611; IEEE-BADFREXP-NEXT: ret void 2612; 2613; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp( 2614; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { 2615; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 2616; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]] 2617; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 2618; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]] 2619; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2620; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2621; DAZ-NEXT: ret void 2622; 2623 %arcp0 = fdiv arcp float %x, %denom, !fpmath !2 2624 %arcp1 = fdiv arcp float %y, %denom, !fpmath !2 2625 store volatile float %arcp0, ptr addrspace(1) %out 2626 store volatile float %arcp1, ptr addrspace(1) %out 2627 ret void 2628} 2629 2630define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y, <2 x float> %denom) { 2631; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector( 2632; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] { 2633; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 2634; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 2635; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2636; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2637; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 2638; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 2639; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 2640; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] 2641; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 2642; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) 2643; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]] 2644; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 2645; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 2646; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 2647; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 2648; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 2649; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 2650; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]] 2651; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 2652; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1 2653; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[Y]], i64 0 2654; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[Y]], i64 1 2655; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2656; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2657; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP22]]) 2658; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0 2659; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP24]], 1 2660; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]] 2661; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP25]]) 2662; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]]) 2663; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = fmul arcp float [[TMP20]], [[TMP29]] 2664; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP23]]) 2665; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 2666; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1 2667; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]] 2668; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP32]]) 2669; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]]) 2670; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = fmul arcp float [[TMP21]], [[TMP36]] 2671; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0 2672; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1 2673; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 2674; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 2675; IEEE-GOODFREXP-NEXT: ret void 2676; 2677; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector( 2678; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] { 2679; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 2680; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 2681; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2682; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2683; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 2684; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 2685; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) 2686; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] 2687; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 2688; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) 2689; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]] 2690; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 2691; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 2692; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) 2693; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 2694; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 2695; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 2696; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]] 2697; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 2698; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1 2699; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[Y]], i64 0 2700; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[Y]], i64 1 2701; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2702; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2703; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP22]]) 2704; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0 2705; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP22]]) 2706; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]] 2707; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP25]]) 2708; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]]) 2709; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = fmul arcp float [[TMP20]], [[TMP29]] 2710; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP23]]) 2711; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 2712; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP23]]) 2713; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]] 2714; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP32]]) 2715; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]]) 2716; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = fmul arcp float [[TMP21]], [[TMP36]] 2717; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0 2718; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1 2719; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 2720; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 2721; IEEE-BADFREXP-NEXT: ret void 2722; 2723; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector( 2724; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] { 2725; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 2726; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 2727; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2728; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2729; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]]) 2730; DAZ-NEXT: [[TMP6:%.*]] = fmul arcp float [[TMP1]], [[TMP5]] 2731; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]]) 2732; DAZ-NEXT: [[TMP8:%.*]] = fmul arcp float [[TMP2]], [[TMP7]] 2733; DAZ-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 2734; DAZ-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i64 1 2735; DAZ-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[Y]], i64 0 2736; DAZ-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[Y]], i64 1 2737; DAZ-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2738; DAZ-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2739; DAZ-NEXT: [[TMP14:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP12]]) 2740; DAZ-NEXT: [[TMP15:%.*]] = fmul arcp float [[TMP10]], [[TMP14]] 2741; DAZ-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 2742; DAZ-NEXT: [[TMP17:%.*]] = fmul arcp float [[TMP11]], [[TMP16]] 2743; DAZ-NEXT: [[TMP18:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i64 0 2744; DAZ-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP18]], float [[TMP17]], i64 1 2745; DAZ-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 2746; DAZ-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 2747; DAZ-NEXT: ret void 2748; 2749 %arcp0 = fdiv arcp <2 x float> %x, %denom, !fpmath !2 2750 %arcp1 = fdiv arcp <2 x float> %y, %denom, !fpmath !2 2751 store volatile <2 x float> %arcp0, ptr addrspace(1) %out 2752 store volatile <2 x float> %arcp1, ptr addrspace(1) %out 2753 ret void 2754} 2755 2756define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp(ptr addrspace(1) %out, float %x, float %y, float %sqr.denom) { 2757; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp( 2758; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { 2759; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000 2760; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 2761; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]]) 2762; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 2763; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 2764; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) 2765; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2766; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 2767; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 2768; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 2769; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 2770; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 2771; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]] 2772; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2773; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 2774; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 2775; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 2776; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 2777; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 2778; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]] 2779; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2780; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2781; IEEE-GOODFREXP-NEXT: ret void 2782; 2783; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp( 2784; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { 2785; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000 2786; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 2787; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]]) 2788; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 2789; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 2790; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) 2791; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2792; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 2793; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 2794; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 2795; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 2796; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 2797; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]] 2798; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 2799; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 2800; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 2801; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 2802; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 2803; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 2804; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]] 2805; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2806; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2807; IEEE-BADFREXP-NEXT: ret void 2808; 2809; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp( 2810; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { 2811; DAZ-NEXT: [[DENOM:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[SQR_DENOM]]) 2812; DAZ-NEXT: [[TMP1:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 2813; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP1]] 2814; DAZ-NEXT: [[TMP2:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 2815; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP2]] 2816; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 2817; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 2818; DAZ-NEXT: ret void 2819; 2820 %denom = call contract float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3 2821 %arcp0 = fdiv contract arcp float %x, %denom, !fpmath !0 2822 %arcp1 = fdiv contract arcp float %y, %denom, !fpmath !0 2823 store volatile float %arcp0, ptr addrspace(1) %out 2824 store volatile float %arcp1, ptr addrspace(1) %out 2825 ret void 2826} 2827 2828define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y, <2 x float> %sqr.denom) { 2829; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp( 2830; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] { 2831; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 0 2832; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 1 2833; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = fcmp olt float [[TMP1]], 0x3810000000000000 2834; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 32, i32 0 2835; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[TMP4]]) 2836; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP5]]) 2837; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], i32 -16, i32 0 2838; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP6]], i32 [[TMP7]]) 2839; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp olt float [[TMP2]], 0x3810000000000000 2840; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 32, i32 0 2841; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP2]], i32 [[TMP10]]) 2842; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP11]]) 2843; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select i1 [[TMP9]], i32 -16, i32 0 2844; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]]) 2845; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 2846; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 2847; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 2848; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 2849; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2850; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2851; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) 2852; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0 2853; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP20]], 1 2854; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = sub i32 0, [[TMP22]] 2855; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP21]]) 2856; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP24]], i32 [[TMP23]]) 2857; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul arcp contract float [[TMP16]], [[TMP25]] 2858; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]]) 2859; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0 2860; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP27]], 1 2861; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = sub i32 0, [[TMP29]] 2862; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP28]]) 2863; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP31]], i32 [[TMP30]]) 2864; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = fmul arcp contract float [[TMP17]], [[TMP32]] 2865; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0 2866; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP34]], float [[TMP33]], i64 1 2867; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[Y]], i64 0 2868; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[Y]], i64 1 2869; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2870; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2871; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP37]]) 2872; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 2873; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1 2874; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]] 2875; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP40]]) 2876; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]]) 2877; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = fmul arcp contract float [[TMP35]], [[TMP44]] 2878; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP38]]) 2879; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0 2880; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = extractvalue { float, i32 } [[TMP46]], 1 2881; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = sub i32 0, [[TMP48]] 2882; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP47]]) 2883; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP50]], i32 [[TMP49]]) 2884; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = fmul arcp contract float [[TMP36]], [[TMP51]] 2885; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = insertelement <2 x float> poison, float [[TMP45]], i64 0 2886; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP53]], float [[TMP52]], i64 1 2887; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 2888; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 2889; IEEE-GOODFREXP-NEXT: ret void 2890; 2891; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp( 2892; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] { 2893; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 0 2894; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 1 2895; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = fcmp olt float [[TMP1]], 0x3810000000000000 2896; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 32, i32 0 2897; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[TMP4]]) 2898; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP5]]) 2899; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], i32 -16, i32 0 2900; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP6]], i32 [[TMP7]]) 2901; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp olt float [[TMP2]], 0x3810000000000000 2902; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 32, i32 0 2903; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP2]], i32 [[TMP10]]) 2904; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP11]]) 2905; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select i1 [[TMP9]], i32 -16, i32 0 2906; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]]) 2907; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 2908; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 2909; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 2910; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 2911; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2912; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2913; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) 2914; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0 2915; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP18]]) 2916; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = sub i32 0, [[TMP22]] 2917; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP21]]) 2918; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP24]], i32 [[TMP23]]) 2919; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul arcp contract float [[TMP16]], [[TMP25]] 2920; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]]) 2921; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0 2922; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP19]]) 2923; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = sub i32 0, [[TMP29]] 2924; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP28]]) 2925; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP31]], i32 [[TMP30]]) 2926; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = fmul arcp contract float [[TMP17]], [[TMP32]] 2927; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0 2928; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP34]], float [[TMP33]], i64 1 2929; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[Y]], i64 0 2930; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[Y]], i64 1 2931; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2932; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2933; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP37]]) 2934; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 2935; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP37]]) 2936; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]] 2937; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP40]]) 2938; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]]) 2939; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = fmul arcp contract float [[TMP35]], [[TMP44]] 2940; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP38]]) 2941; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0 2942; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP38]]) 2943; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = sub i32 0, [[TMP48]] 2944; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP47]]) 2945; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP50]], i32 [[TMP49]]) 2946; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = fmul arcp contract float [[TMP36]], [[TMP51]] 2947; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = insertelement <2 x float> poison, float [[TMP45]], i64 0 2948; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP53]], float [[TMP52]], i64 1 2949; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 2950; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 2951; IEEE-BADFREXP-NEXT: ret void 2952; 2953; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp( 2954; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] { 2955; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 0 2956; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 1 2957; DAZ-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) 2958; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) 2959; DAZ-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 2960; DAZ-NEXT: [[DENOM:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1 2961; DAZ-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[X]], i64 0 2962; DAZ-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[X]], i64 1 2963; DAZ-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2964; DAZ-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2965; DAZ-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP8]]) 2966; DAZ-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP6]], [[TMP10]] 2967; DAZ-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP9]]) 2968; DAZ-NEXT: [[TMP13:%.*]] = fmul arcp contract float [[TMP7]], [[TMP12]] 2969; DAZ-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 2970; DAZ-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP13]], i64 1 2971; DAZ-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[Y]], i64 0 2972; DAZ-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[Y]], i64 1 2973; DAZ-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 2974; DAZ-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 2975; DAZ-NEXT: [[TMP19:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP17]]) 2976; DAZ-NEXT: [[TMP20:%.*]] = fmul arcp contract float [[TMP15]], [[TMP19]] 2977; DAZ-NEXT: [[TMP21:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP18]]) 2978; DAZ-NEXT: [[TMP22:%.*]] = fmul arcp contract float [[TMP16]], [[TMP21]] 2979; DAZ-NEXT: [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP20]], i64 0 2980; DAZ-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP22]], i64 1 2981; DAZ-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 2982; DAZ-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 2983; DAZ-NEXT: ret void 2984; 2985 %denom = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %sqr.denom), !fpmath !3 2986 %arcp0 = fdiv contract arcp <2 x float> %x, %denom, !fpmath !0 2987 %arcp1 = fdiv contract arcp <2 x float> %y, %denom, !fpmath !0 2988 store volatile <2 x float> %arcp0, ptr addrspace(1) %out 2989 store volatile <2 x float> %arcp1, ptr addrspace(1) %out 2990 ret void 2991} 2992 2993define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(ptr addrspace(1) %out, float %x, float %y, float %z, float %sqr.denom) { 2994; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3( 2995; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { 2996; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000 2997; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 2998; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]]) 2999; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 3000; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 3001; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) 3002; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 3003; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 3004; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 3005; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 3006; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 3007; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 3008; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]] 3009; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 3010; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 3011; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 3012; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 3013; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 3014; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 3015; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]] 3016; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 3017; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 3018; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 3019; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] 3020; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP19]]) 3021; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) 3022; IEEE-GOODFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP23]] 3023; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 3024; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 3025; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 3026; IEEE-GOODFREXP-NEXT: ret void 3027; 3028; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3( 3029; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { 3030; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000 3031; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 3032; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]]) 3033; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 3034; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 3035; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) 3036; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 3037; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 3038; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 3039; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] 3040; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) 3041; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) 3042; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]] 3043; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 3044; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 3045; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 3046; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 3047; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 3048; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 3049; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]] 3050; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) 3051; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 3052; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) 3053; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] 3054; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP19]]) 3055; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) 3056; IEEE-BADFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP23]] 3057; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 3058; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 3059; IEEE-BADFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 3060; IEEE-BADFREXP-NEXT: ret void 3061; 3062; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3( 3063; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { 3064; DAZ-NEXT: [[DENOM:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[SQR_DENOM]]) 3065; DAZ-NEXT: [[TMP1:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 3066; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP1]] 3067; DAZ-NEXT: [[TMP2:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 3068; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP2]] 3069; DAZ-NEXT: [[TMP3:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]]) 3070; DAZ-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP3]] 3071; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 3072; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 3073; DAZ-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 3074; DAZ-NEXT: ret void 3075; 3076 %denom = call contract float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3 3077 %arcp0 = fdiv contract arcp float %x, %denom, !fpmath !0 3078 %arcp1 = fdiv contract arcp float %y, %denom, !fpmath !0 3079 %arcp2 = fdiv contract arcp float %z, %denom, !fpmath !0 3080 store volatile float %arcp0, ptr addrspace(1) %out 3081 store volatile float %arcp1, ptr addrspace(1) %out 3082 store volatile float %arcp2, ptr addrspace(1) %out 3083 ret void 3084} 3085 3086define <4 x float> @rsq_f32_vector_mixed_constant_numerator(<4 x float> %arg) { 3087; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator( 3088; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3089; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] 3090; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3091; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3092; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3093; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3094; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3095; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3096; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3097; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3098; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp contract olt float [[TMP5]], 0x3810000000000000 3099; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 3100; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul contract float [[TMP5]], [[TMP10]] 3101; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP11]]) 3102; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 3103; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = fmul contract float [[TMP12]], [[TMP13]] 3104; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fcmp contract olt float [[TMP6]], 0x3810000000000000 3105; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = select contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 3106; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul contract float [[TMP6]], [[TMP16]] 3107; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP17]]) 3108; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = select contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 3109; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul contract float [[TMP18]], [[TMP19]] 3110; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3111; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 3112; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP21]], 1 3113; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) 3114; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) 3115; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 3116; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1 3117; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]] 3118; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]] 3119; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]]) 3120; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3121; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 3122; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1 3123; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) 3124; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3125; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 3126; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractvalue { float, i32 } [[TMP35]], 1 3127; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]] 3128; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]] 3129; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]]) 3130; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 3131; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1 3132; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2 3133; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3 3134; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3135; 3136; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator( 3137; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3138; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] 3139; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3140; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3141; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3142; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3143; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3144; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3145; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3146; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3147; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp contract olt float [[TMP5]], 0x3810000000000000 3148; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 3149; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul contract float [[TMP5]], [[TMP10]] 3150; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP11]]) 3151; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 3152; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = fmul contract float [[TMP12]], [[TMP13]] 3153; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fcmp contract olt float [[TMP6]], 0x3810000000000000 3154; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = select contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 3155; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul contract float [[TMP6]], [[TMP16]] 3156; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP17]]) 3157; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = select contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 3158; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul contract float [[TMP18]], [[TMP19]] 3159; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3160; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 3161; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) 3162; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) 3163; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) 3164; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 3165; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00) 3166; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]] 3167; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]] 3168; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]]) 3169; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3170; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 3171; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) 3172; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) 3173; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3174; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 3175; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison) 3176; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]] 3177; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]] 3178; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]]) 3179; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 3180; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1 3181; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2 3182; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3 3183; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3184; 3185; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator( 3186; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3187; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3188; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3189; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3190; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3191; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) 3192; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) 3193; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 3194; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]]) 3195; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 3196; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1 3197; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2 3198; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3 3199; DAZ-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3200; DAZ-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3201; DAZ-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3202; DAZ-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3203; DAZ-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP12]]) 3204; DAZ-NEXT: [[TMP17:%.*]] = fneg contract float [[TMP13]] 3205; DAZ-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP17]]) 3206; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP14]]) 3207; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 3208; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 3209; DAZ-NEXT: [[TMP22:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP20]]) 3210; DAZ-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) 3211; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 3212; DAZ-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 3213; DAZ-NEXT: [[TMP26:%.*]] = fmul contract float [[TMP24]], [[TMP22]] 3214; DAZ-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] 3215; DAZ-NEXT: [[TMP28:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) 3216; DAZ-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP15]]) 3217; DAZ-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0 3218; DAZ-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP29]], 1 3219; DAZ-NEXT: [[TMP32:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP30]]) 3220; DAZ-NEXT: [[TMP33:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3221; DAZ-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP33]], 0 3222; DAZ-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP33]], 1 3223; DAZ-NEXT: [[TMP36:%.*]] = fmul contract float [[TMP34]], [[TMP32]] 3224; DAZ-NEXT: [[TMP37:%.*]] = sub i32 [[TMP35]], [[TMP31]] 3225; DAZ-NEXT: [[TMP38:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP36]], i32 [[TMP37]]) 3226; DAZ-NEXT: [[TMP39:%.*]] = insertelement <4 x float> poison, float [[TMP16]], i64 0 3227; DAZ-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP18]], i64 1 3228; DAZ-NEXT: [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP28]], i64 2 3229; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP38]], i64 3 3230; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3231; 3232 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2 3233 %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2 3234 ret <4 x float> %partial.rsq 3235} 3236 3237define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt(<4 x float> %arg) { 3238; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt( 3239; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3240; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) 3241; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3242; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3243; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3244; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3245; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3246; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3247; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3248; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3249; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp contract afn olt float [[TMP5]], 0x3810000000000000 3250; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select contract afn i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 3251; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul contract afn float [[TMP5]], [[TMP10]] 3252; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP11]]) 3253; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select contract afn i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 3254; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = fmul contract afn float [[TMP12]], [[TMP13]] 3255; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fcmp contract afn olt float [[TMP6]], 0x3810000000000000 3256; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = select contract afn i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 3257; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul contract afn float [[TMP6]], [[TMP16]] 3258; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP17]]) 3259; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = select contract afn i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 3260; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul contract afn float [[TMP18]], [[TMP19]] 3261; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3262; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 3263; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP21]], 1 3264; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) 3265; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) 3266; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 3267; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1 3268; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]] 3269; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]] 3270; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]]) 3271; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3272; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 3273; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1 3274; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) 3275; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3276; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 3277; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractvalue { float, i32 } [[TMP35]], 1 3278; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]] 3279; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]] 3280; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]]) 3281; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 3282; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1 3283; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2 3284; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3 3285; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3286; 3287; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt( 3288; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3289; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) 3290; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3291; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3292; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3293; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3294; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3295; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3296; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3297; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3298; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp contract afn olt float [[TMP5]], 0x3810000000000000 3299; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select contract afn i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 3300; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul contract afn float [[TMP5]], [[TMP10]] 3301; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP11]]) 3302; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select contract afn i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 3303; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = fmul contract afn float [[TMP12]], [[TMP13]] 3304; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fcmp contract afn olt float [[TMP6]], 0x3810000000000000 3305; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = select contract afn i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 3306; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul contract afn float [[TMP6]], [[TMP16]] 3307; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP17]]) 3308; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = select contract afn i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 3309; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul contract afn float [[TMP18]], [[TMP19]] 3310; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3311; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 3312; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) 3313; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) 3314; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) 3315; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 3316; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00) 3317; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]] 3318; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]] 3319; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]]) 3320; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3321; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 3322; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) 3323; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) 3324; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3325; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 3326; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison) 3327; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]] 3328; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]] 3329; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]]) 3330; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 3331; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1 3332; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2 3333; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3 3334; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3335; 3336; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt( 3337; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3338; DAZ-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) 3339; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3340; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3341; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3342; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3343; DAZ-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3344; DAZ-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3345; DAZ-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3346; DAZ-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3347; DAZ-NEXT: [[TMP9:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP5]]) 3348; DAZ-NEXT: [[TMP10:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP6]]) 3349; DAZ-NEXT: [[TMP11:%.*]] = fneg contract afn float [[TMP10]] 3350; DAZ-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3351; DAZ-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 3352; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 3353; DAZ-NEXT: [[TMP15:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 3354; DAZ-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) 3355; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0 3356; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1 3357; DAZ-NEXT: [[TMP19:%.*]] = fmul contract float [[TMP17]], [[TMP15]] 3358; DAZ-NEXT: [[TMP20:%.*]] = sub i32 [[TMP18]], [[TMP14]] 3359; DAZ-NEXT: [[TMP21:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP19]], i32 [[TMP20]]) 3360; DAZ-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3361; DAZ-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0 3362; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1 3363; DAZ-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP23]]) 3364; DAZ-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3365; DAZ-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0 3366; DAZ-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP26]], 1 3367; DAZ-NEXT: [[TMP29:%.*]] = fmul contract float [[TMP27]], [[TMP25]] 3368; DAZ-NEXT: [[TMP30:%.*]] = sub i32 [[TMP28]], [[TMP24]] 3369; DAZ-NEXT: [[TMP31:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP30]]) 3370; DAZ-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP9]], i64 0 3371; DAZ-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP11]], i64 1 3372; DAZ-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP21]], i64 2 3373; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3 3374; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3375; 3376 %denom = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg) 3377 %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2 3378 ret <4 x float> %partial.rsq 3379} 3380 3381define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div(<4 x float> %arg) { 3382; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div( 3383; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3384; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2:![0-9]+]] 3385; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract afn <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] 3386; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3387; 3388; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div( 3389; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3390; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3391; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3392; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3393; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3394; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) 3395; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) 3396; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 3397; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]]) 3398; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 3399; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1 3400; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2 3401; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3 3402; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract afn <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] 3403; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3404; 3405 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2 3406 %partial.rsq = fdiv contract afn <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom 3407 ret <4 x float> %partial.rsq 3408} 3409 3410define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv(<4 x float> %arg) { 3411; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv( 3412; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3413; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] 3414; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] 3415; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3416; 3417; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv( 3418; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3419; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3420; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3421; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3422; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3423; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) 3424; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) 3425; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 3426; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]]) 3427; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 3428; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1 3429; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2 3430; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3 3431; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] 3432; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3433; 3434 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2 3435 %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom 3436 ret <4 x float> %partial.rsq 3437} 3438 3439define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt(<4 x float> %arg) { 3440; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt( 3441; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3442; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) 3443; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3444; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3445; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3446; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3447; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) 3448; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 3449; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 3450; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] 3451; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 3452; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) 3453; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg contract float [[TMP2]] 3454; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) 3455; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 3456; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 3457; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 3458; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 3459; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 3460; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3461; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 3462; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 3463; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]]) 3464; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) 3465; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0 3466; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1 3467; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]] 3468; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]] 3469; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]]) 3470; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3471; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 3472; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 3473; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 3474; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3475; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 3476; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1 3477; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]] 3478; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] 3479; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) 3480; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0 3481; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP17]], i64 1 3482; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP27]], i64 2 3483; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP37]], i64 3 3484; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3485; 3486; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt( 3487; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3488; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) 3489; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3490; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3491; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3492; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3493; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) 3494; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 3495; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]]) 3496; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] 3497; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 3498; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) 3499; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg contract float [[TMP2]] 3500; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) 3501; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 3502; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]]) 3503; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 3504; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 3505; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 3506; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3507; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 3508; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) 3509; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]]) 3510; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) 3511; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0 3512; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00) 3513; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]] 3514; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]] 3515; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]]) 3516; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3517; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 3518; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) 3519; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 3520; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3521; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 3522; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison) 3523; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]] 3524; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] 3525; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) 3526; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0 3527; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP17]], i64 1 3528; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP27]], i64 2 3529; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP37]], i64 3 3530; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3531; 3532; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt( 3533; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3534; DAZ-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) 3535; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3536; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3537; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3538; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3539; DAZ-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP1]]) 3540; DAZ-NEXT: [[TMP6:%.*]] = fneg contract float [[TMP2]] 3541; DAZ-NEXT: [[TMP7:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 3542; DAZ-NEXT: [[TMP8:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3543; DAZ-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP8]], 0 3544; DAZ-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP8]], 1 3545; DAZ-NEXT: [[TMP11:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP9]]) 3546; DAZ-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) 3547; DAZ-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 3548; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 3549; DAZ-NEXT: [[TMP15:%.*]] = fmul contract float [[TMP13]], [[TMP11]] 3550; DAZ-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP10]] 3551; DAZ-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP16]]) 3552; DAZ-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3553; DAZ-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 3554; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 3555; DAZ-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]]) 3556; DAZ-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3557; DAZ-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0 3558; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1 3559; DAZ-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]] 3560; DAZ-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]] 3561; DAZ-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]]) 3562; DAZ-NEXT: [[TMP28:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 3563; DAZ-NEXT: [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP7]], i64 1 3564; DAZ-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP17]], i64 2 3565; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP27]], i64 3 3566; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3567; 3568 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg) 3569 %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2 3570 ret <4 x float> %partial.rsq 3571} 3572 3573define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) { 3574; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp( 3575; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3576; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] 3577; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3578; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3579; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3580; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3581; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3582; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3583; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3584; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3585; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp arcp contract olt float [[TMP5]], 0x3810000000000000 3586; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select arcp contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 3587; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP5]], [[TMP10]] 3588; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP11]]) 3589; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select arcp contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 3590; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = fmul arcp contract float [[TMP12]], [[TMP13]] 3591; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fcmp arcp contract olt float [[TMP6]], 0x3810000000000000 3592; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = select arcp contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 3593; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul arcp contract float [[TMP6]], [[TMP16]] 3594; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP17]]) 3595; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = select arcp contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 3596; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul arcp contract float [[TMP18]], [[TMP19]] 3597; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3598; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 3599; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP21]], 1 3600; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = sub i32 0, [[TMP23]] 3601; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) 3602; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP24]]) 3603; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP26]] 3604; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3605; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 3606; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 3607; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] 3608; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 3609; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) 3610; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = fmul arcp contract float poison, [[TMP33]] 3611; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 3612; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP20]], i64 1 3613; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP27]], i64 2 3614; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP37]], float [[TMP34]], i64 3 3615; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3616; 3617; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp( 3618; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3619; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] 3620; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3621; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3622; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3623; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3624; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3625; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3626; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3627; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3628; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp arcp contract olt float [[TMP5]], 0x3810000000000000 3629; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select arcp contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 3630; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP5]], [[TMP10]] 3631; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP11]]) 3632; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select arcp contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 3633; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = fmul arcp contract float [[TMP12]], [[TMP13]] 3634; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fcmp arcp contract olt float [[TMP6]], 0x3810000000000000 3635; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = select arcp contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 3636; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul arcp contract float [[TMP6]], [[TMP16]] 3637; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP17]]) 3638; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = select arcp contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 3639; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul arcp contract float [[TMP18]], [[TMP19]] 3640; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3641; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 3642; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) 3643; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = sub i32 0, [[TMP23]] 3644; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) 3645; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP24]]) 3646; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP26]] 3647; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3648; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 3649; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) 3650; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] 3651; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 3652; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) 3653; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = fmul arcp contract float poison, [[TMP33]] 3654; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 3655; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP20]], i64 1 3656; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP27]], i64 2 3657; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP37]], float [[TMP34]], i64 3 3658; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3659; 3660; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp( 3661; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3662; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3663; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3664; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3665; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3666; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) 3667; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) 3668; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 3669; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]]) 3670; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 3671; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1 3672; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2 3673; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3 3674; DAZ-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 3675; DAZ-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 3676; DAZ-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 3677; DAZ-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 3678; DAZ-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP12]]) 3679; DAZ-NEXT: [[TMP17:%.*]] = fneg arcp contract float [[TMP13]] 3680; DAZ-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP17]]) 3681; DAZ-NEXT: [[TMP19:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP14]]) 3682; DAZ-NEXT: [[TMP20:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP19]] 3683; DAZ-NEXT: [[TMP21:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP15]]) 3684; DAZ-NEXT: [[TMP22:%.*]] = fmul arcp contract float poison, [[TMP21]] 3685; DAZ-NEXT: [[TMP23:%.*]] = insertelement <4 x float> poison, float [[TMP16]], i64 0 3686; DAZ-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP18]], i64 1 3687; DAZ-NEXT: [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP20]], i64 2 3688; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP22]], i64 3 3689; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3690; 3691 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2 3692 %partial.rsq = fdiv contract arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2 3693 ret <4 x float> %partial.rsq 3694} 3695 3696define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) { 3697; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct( 3698; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3699; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] 3700; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv arcp contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] 3701; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3702; 3703; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct( 3704; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3705; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3706; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3707; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3708; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3709; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) 3710; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) 3711; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) 3712; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]]) 3713; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 3714; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1 3715; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2 3716; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3 3717; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv arcp contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] 3718; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3719; 3720 %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2 3721 %partial.rsq = fdiv contract arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom 3722 ret <4 x float> %partial.rsq 3723} 3724 3725define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) { 3726; IEEE-GOODFREXP-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp( 3727; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3728; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3729; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3730; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3731; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3732; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) 3733; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 3734; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 3735; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] 3736; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 3737; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) 3738; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg arcp float [[TMP2]] 3739; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) 3740; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 3741; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 3742; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 3743; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 3744; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 3745; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3746; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 3747; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 3748; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] 3749; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP19]]) 3750; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) 3751; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = fmul arcp float 4.000000e+00, [[TMP23]] 3752; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3753; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 3754; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1 3755; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = sub i32 0, [[TMP27]] 3756; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP26]]) 3757; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP28]]) 3758; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = fmul arcp float poison, [[TMP30]] 3759; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0 3760; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP17]], i64 1 3761; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP24]], i64 2 3762; IEEE-GOODFREXP-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3 3763; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RCP]] 3764; 3765; IEEE-BADFREXP-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp( 3766; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3767; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3768; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3769; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3770; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3771; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) 3772; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 3773; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]]) 3774; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] 3775; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 3776; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) 3777; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg arcp float [[TMP2]] 3778; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) 3779; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 3780; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]]) 3781; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 3782; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 3783; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 3784; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) 3785; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 3786; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) 3787; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] 3788; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP19]]) 3789; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) 3790; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = fmul arcp float 4.000000e+00, [[TMP23]] 3791; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) 3792; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 3793; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) 3794; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = sub i32 0, [[TMP27]] 3795; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP26]]) 3796; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP28]]) 3797; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = fmul arcp float poison, [[TMP30]] 3798; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0 3799; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP17]], i64 1 3800; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP24]], i64 2 3801; IEEE-BADFREXP-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3 3802; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RCP]] 3803; 3804; DAZ-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp( 3805; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3806; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 3807; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 3808; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 3809; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 3810; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP1]]) 3811; DAZ-NEXT: [[TMP6:%.*]] = fneg arcp float [[TMP2]] 3812; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) 3813; DAZ-NEXT: [[TMP8:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]]) 3814; DAZ-NEXT: [[TMP9:%.*]] = fmul arcp float 4.000000e+00, [[TMP8]] 3815; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]]) 3816; DAZ-NEXT: [[TMP11:%.*]] = fmul arcp float poison, [[TMP10]] 3817; DAZ-NEXT: [[TMP12:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 3818; DAZ-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP7]], i64 1 3819; DAZ-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i64 2 3820; DAZ-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 3 3821; DAZ-NEXT: ret <4 x float> [[PARTIAL_RCP]] 3822; 3823 %partial.rcp = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %arg, !fpmath !2 3824 ret <4 x float> %partial.rcp 3825} 3826 3827define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) { 3828; CHECK-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp_correct( 3829; CHECK-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { 3830; CHECK-NEXT: [[PARTIAL_RCP:%.*]] = fdiv arcp <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[ARG]] 3831; CHECK-NEXT: ret <4 x float> [[PARTIAL_RCP]] 3832; 3833 %partial.rcp = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %arg 3834 ret <4 x float> %partial.rcp 3835} 3836 3837; Make sure we don't crash if a vector square root has a constant vecctor input 3838define <4 x float> @rsq_f32_vector_const_denom(ptr addrspace(1) %out, <2 x float> %x) { 3839; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_const_denom( 3840; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 3841; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 4.000000e+00) 3842; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 2.000000e+00) 3843; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 8.000000e+00) 3844; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float poison) 3845; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0 3846; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP2]], i64 1 3847; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i64 2 3848; IEEE-GOODFREXP-NEXT: [[SQRT:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP4]], i64 3 3849; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[SQRT]], i64 0 3850; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SQRT]], i64 1 3851; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[SQRT]], i64 2 3852; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[SQRT]], i64 3 3853; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP8]]) 3854; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 3855; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 3856; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 3857; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 3858; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 3859; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fneg contract float [[TMP9]] 3860; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) 3861; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0 3862; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = extractvalue { float, i32 } [[TMP48]], 1 3863; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = sub i32 0, [[TMP50]] 3864; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP49]]) 3865; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP51]], i32 [[TMP22]]) 3866; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]]) 3867; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0 3868; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP29]], 1 3869; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP30]]) 3870; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3871; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = extractvalue { float, i32 } [[TMP52]], 0 3872; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = extractvalue { float, i32 } [[TMP52]], 1 3873; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP53]], [[TMP28]] 3874; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = sub i32 [[TMP54]], [[TMP31]] 3875; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP33]]) 3876; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) 3877; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 3878; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractvalue { float, i32 } [[TMP35]], 1 3879; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP36]]) 3880; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) 3881; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 3882; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1 3883; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = fmul contract float [[TMP40]], [[TMP38]] 3884; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = sub i32 [[TMP41]], [[TMP37]] 3885; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP42]], i32 [[TMP43]]) 3886; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP17]], i64 0 3887; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP24]], i64 1 3888; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP34]], i64 2 3889; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP44]], i64 3 3890; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3891; 3892; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_const_denom( 3893; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 3894; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 4.000000e+00) 3895; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 2.000000e+00) 3896; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 8.000000e+00) 3897; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float poison) 3898; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0 3899; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP2]], i64 1 3900; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i64 2 3901; IEEE-BADFREXP-NEXT: [[SQRT:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP4]], i64 3 3902; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[SQRT]], i64 0 3903; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SQRT]], i64 1 3904; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[SQRT]], i64 2 3905; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[SQRT]], i64 3 3906; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP8]]) 3907; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 3908; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP8]]) 3909; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] 3910; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 3911; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) 3912; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fneg contract float [[TMP9]] 3913; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) 3914; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0 3915; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP18]]) 3916; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = sub i32 0, [[TMP21]] 3917; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP49]]) 3918; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP50]], i32 [[TMP22]]) 3919; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]]) 3920; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0 3921; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP10]]) 3922; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP30]]) 3923; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3924; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = extractvalue { float, i32 } [[TMP51]], 0 3925; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison) 3926; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP52]], [[TMP28]] 3927; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP27]] 3928; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP33]]) 3929; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) 3930; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 3931; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]]) 3932; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP36]]) 3933; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) 3934; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 3935; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 2.000000e+00) 3936; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = fmul contract float [[TMP40]], [[TMP38]] 3937; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = sub i32 [[TMP41]], [[TMP37]] 3938; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP42]], i32 [[TMP43]]) 3939; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP17]], i64 0 3940; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP24]], i64 1 3941; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP34]], i64 2 3942; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP44]], i64 3 3943; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3944; 3945; DAZ-LABEL: define <4 x float> @rsq_f32_vector_const_denom( 3946; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 3947; DAZ-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 4.000000e+00) 3948; DAZ-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 2.000000e+00) 3949; DAZ-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 8.000000e+00) 3950; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float poison) 3951; DAZ-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0 3952; DAZ-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP2]], i64 1 3953; DAZ-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i64 2 3954; DAZ-NEXT: [[SQRT:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP4]], i64 3 3955; DAZ-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[SQRT]], i64 0 3956; DAZ-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SQRT]], i64 1 3957; DAZ-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[SQRT]], i64 2 3958; DAZ-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[SQRT]], i64 3 3959; DAZ-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP8]]) 3960; DAZ-NEXT: [[TMP13:%.*]] = fneg contract float [[TMP9]] 3961; DAZ-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) 3962; DAZ-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]]) 3963; DAZ-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0 3964; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP15]], 1 3965; DAZ-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP16]]) 3966; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) 3967; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 3968; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 3969; DAZ-NEXT: [[TMP22:%.*]] = fmul contract float [[TMP20]], [[TMP18]] 3970; DAZ-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]] 3971; DAZ-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]]) 3972; DAZ-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) 3973; DAZ-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 3974; DAZ-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1 3975; DAZ-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP26]]) 3976; DAZ-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) 3977; DAZ-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0 3978; DAZ-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP29]], 1 3979; DAZ-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP30]], [[TMP28]] 3980; DAZ-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP27]] 3981; DAZ-NEXT: [[TMP34:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP33]]) 3982; DAZ-NEXT: [[TMP35:%.*]] = insertelement <4 x float> poison, float [[TMP12]], i64 0 3983; DAZ-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP14]], i64 1 3984; DAZ-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP24]], i64 2 3985; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP37]], float [[TMP34]], i64 3 3986; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] 3987; 3988 %sqrt = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.0, float 2.0, float 8.0, float poison>), !fpmath !2 3989 %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float poison, float 2.0>, %sqrt, !fpmath !2 3990 ret <4 x float> %partial.rsq 3991} 3992 3993define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float> %x) { 3994; IEEE-GOODFREXP-LABEL: define <4 x float> @fdiv_constant_f32_vector( 3995; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 3996; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 5.000000e-01) 3997; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 3998; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 3999; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 4000; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 4001; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 4002; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float -2.000000e+00) 4003; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 4004; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 4005; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] 4006; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP8]]) 4007; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) 4008; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01) 4009; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0 4010; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1 4011; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]]) 4012; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef) 4013; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0 4014; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP17]], 1 4015; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]] 4016; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]] 4017; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]]) 4018; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01) 4019; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 4020; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 4021; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP24]]) 4022; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) 4023; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0 4024; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP27]], 1 4025; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = fmul float [[TMP28]], [[TMP26]] 4026; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 [[TMP29]], [[TMP25]] 4027; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP31]]) 4028; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0 4029; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP12]], i64 1 4030; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP22]], i64 2 4031; IEEE-GOODFREXP-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP32]], i64 3 4032; IEEE-GOODFREXP-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]] 4033; 4034; IEEE-BADFREXP-LABEL: define <4 x float> @fdiv_constant_f32_vector( 4035; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 4036; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 5.000000e-01) 4037; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 4038; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 5.000000e-01) 4039; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] 4040; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 4041; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) 4042; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float -2.000000e+00) 4043; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 4044; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float -2.000000e+00) 4045; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] 4046; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP8]]) 4047; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) 4048; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01) 4049; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0 4050; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 3.200000e+01) 4051; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]]) 4052; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef) 4053; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0 4054; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float undef) 4055; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]] 4056; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]] 4057; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]]) 4058; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01) 4059; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 4060; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 1.000000e+01) 4061; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP24]]) 4062; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) 4063; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0 4064; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 2.000000e+00) 4065; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = fmul float [[TMP28]], [[TMP26]] 4066; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 [[TMP29]], [[TMP25]] 4067; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP31]]) 4068; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0 4069; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP12]], i64 1 4070; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP22]], i64 2 4071; IEEE-BADFREXP-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP32]], i64 3 4072; IEEE-BADFREXP-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]] 4073; 4074; DAZ-LABEL: define <4 x float> @fdiv_constant_f32_vector( 4075; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { 4076; DAZ-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.rcp.f32(float 5.000000e-01) 4077; DAZ-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float -2.000000e+00) 4078; DAZ-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01) 4079; DAZ-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0 4080; DAZ-NEXT: [[TMP5:%.*]] = extractvalue { float, i32 } [[TMP3]], 1 4081; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP4]]) 4082; DAZ-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float undef) 4083; DAZ-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 4084; DAZ-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 4085; DAZ-NEXT: [[TMP10:%.*]] = fmul float [[TMP8]], [[TMP6]] 4086; DAZ-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP5]] 4087; DAZ-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP11]]) 4088; DAZ-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01) 4089; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0 4090; DAZ-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1 4091; DAZ-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]]) 4092; DAZ-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) 4093; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0 4094; DAZ-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP17]], 1 4095; DAZ-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]] 4096; DAZ-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]] 4097; DAZ-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]]) 4098; DAZ-NEXT: [[TMP23:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0 4099; DAZ-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP2]], i64 1 4100; DAZ-NEXT: [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP12]], i64 2 4101; DAZ-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP22]], i64 3 4102; DAZ-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]] 4103; 4104 %const.partial.rcp = fdiv <4 x float> <float 1.0, float -1.0, float undef, float 2.0>, <float 0.5, float 2.0, float 32.0, float 10.0>, !fpmath !2 4105 ret <4 x float> %const.partial.rcp 4106} 4107 4108define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(ptr addrspace(1) %out, float nofpclass(sub) %a, float %b) { 4109; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs( 4110; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { 4111; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] 4112; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 4113; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] 4114; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 4115; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4116; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 4117; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 4118; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 4119; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4120; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 4121; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 4122; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] 4123; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 4124; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 4125; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4126; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4127; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 4128; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 4129; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) 4130; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4131; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 4132; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1 4133; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] 4134; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] 4135; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) 4136; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4137; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4138; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 4139; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 4140; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) 4141; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4142; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 4143; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 4144; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] 4145; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] 4146; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) 4147; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 4148; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] 4149; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4150; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] 4151; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4152; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] 4153; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 4154; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4155; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 4156; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 4157; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] 4158; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 4159; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) 4160; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] 4161; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4162; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4163; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 4164; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1 4165; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] 4166; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) 4167; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) 4168; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] 4169; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4170; IEEE-GOODFREXP-NEXT: ret void 4171; 4172; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs( 4173; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { 4174; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] 4175; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 4176; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] 4177; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 4178; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4179; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 4180; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 4181; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 4182; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4183; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 4184; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 4185; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] 4186; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 4187; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 4188; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4189; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4190; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 4191; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 4192; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) 4193; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4194; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 4195; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 4196; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] 4197; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] 4198; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) 4199; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4200; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4201; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 4202; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 4203; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) 4204; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4205; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 4206; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 4207; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] 4208; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] 4209; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) 4210; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 4211; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] 4212; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4213; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] 4214; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4215; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] 4216; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 4217; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4218; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 4219; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 4220; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] 4221; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 4222; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) 4223; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] 4224; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4225; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4226; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 4227; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 4228; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] 4229; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) 4230; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) 4231; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] 4232; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4233; IEEE-BADFREXP-NEXT: ret void 4234; 4235; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs( 4236; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { 4237; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] 4238; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 4239; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] 4240; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 4241; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4242; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 4243; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 4244; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 4245; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4246; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 4247; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 4248; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] 4249; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 4250; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 4251; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4252; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) 4253; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4254; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) 4255; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 4256; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] 4257; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4258; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] 4259; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4260; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] 4261; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 4262; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) 4263; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]] 4264; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4265; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) 4266; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]] 4267; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4268; DAZ-NEXT: ret void 4269; 4270 %no.md = fdiv float %a, %b 4271 store volatile float %no.md, ptr addrspace(1) %out, align 4 4272 %md.half.ulp = fdiv float %a, %b, !fpmath !1 4273 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 4274 %md.1ulp = fdiv float %a, %b, !fpmath !2 4275 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 4276 %md.25ulp = fdiv float %a, %b, !fpmath !0 4277 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 4278 %md.3ulp = fdiv float %a, %b, !fpmath !3 4279 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 4280 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 4281 store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4 4282 %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0 4283 store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4 4284 %no.md.arcp = fdiv arcp float %a, %b 4285 store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4 4286 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 4287 store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4 4288 %arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2 4289 store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4 4290 ret void 4291} 4292 4293define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(ptr addrspace(1) %out, float %a, float nofpclass(sub) %b) { 4294; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs( 4295; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] { 4296; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] 4297; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 4298; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] 4299; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 4300; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4301; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 4302; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 4303; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 4304; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4305; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 4306; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 4307; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] 4308; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 4309; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 4310; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4311; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4312; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 4313; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 4314; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) 4315; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4316; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 4317; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1 4318; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] 4319; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] 4320; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) 4321; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4322; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4323; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 4324; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 4325; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) 4326; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4327; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 4328; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 4329; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] 4330; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] 4331; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) 4332; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 4333; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] 4334; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4335; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] 4336; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4337; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] 4338; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 4339; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4340; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 4341; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 4342; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] 4343; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 4344; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) 4345; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] 4346; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4347; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4348; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 4349; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1 4350; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] 4351; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) 4352; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) 4353; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] 4354; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4355; IEEE-GOODFREXP-NEXT: ret void 4356; 4357; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs( 4358; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] { 4359; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] 4360; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 4361; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] 4362; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 4363; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4364; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 4365; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 4366; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 4367; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4368; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 4369; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 4370; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] 4371; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 4372; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 4373; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4374; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4375; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 4376; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 4377; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) 4378; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4379; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 4380; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 4381; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] 4382; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] 4383; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) 4384; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4385; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4386; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 4387; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 4388; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) 4389; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4390; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 4391; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) 4392; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] 4393; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] 4394; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) 4395; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 4396; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] 4397; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4398; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] 4399; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4400; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] 4401; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 4402; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4403; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 4404; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 4405; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] 4406; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) 4407; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) 4408; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] 4409; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4410; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4411; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 4412; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) 4413; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] 4414; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) 4415; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) 4416; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] 4417; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4418; IEEE-BADFREXP-NEXT: ret void 4419; 4420; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs( 4421; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] { 4422; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] 4423; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 4424; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] 4425; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 4426; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) 4427; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 4428; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 4429; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) 4430; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) 4431; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 4432; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 4433; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] 4434; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] 4435; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) 4436; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4437; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) 4438; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4439; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) 4440; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 4441; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] 4442; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4443; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] 4444; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4445; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] 4446; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 4447; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) 4448; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]] 4449; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 4450; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) 4451; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]] 4452; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 4453; DAZ-NEXT: ret void 4454; 4455 %no.md = fdiv float %a, %b 4456 store volatile float %no.md, ptr addrspace(1) %out, align 4 4457 %md.half.ulp = fdiv float %a, %b, !fpmath !1 4458 store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 4459 %md.1ulp = fdiv float %a, %b, !fpmath !2 4460 store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 4461 %md.25ulp = fdiv float %a, %b, !fpmath !0 4462 store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 4463 %md.3ulp = fdiv float %a, %b, !fpmath !3 4464 store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 4465 %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 4466 store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4 4467 %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0 4468 store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4 4469 %no.md.arcp = fdiv arcp float %a, %b 4470 store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4 4471 %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 4472 store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4 4473 %arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2 4474 store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4 4475 ret void 4476} 4477 4478declare float @llvm.sqrt.f32(float) 4479declare float @llvm.fabs.f32(float) 4480declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) 4481declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) 4482declare void @llvm.assume(i1 noundef) 4483 4484attributes #0 = { optnone noinline } 4485 4486!0 = !{float 2.500000e+00} 4487!1 = !{float 5.000000e-01} 4488!2 = !{float 1.000000e+00} 4489!3 = !{float 3.000000e+00} 4490