1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SDAG,GCN-IEEE,SDAG-IEEE %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GISEL,GCN-IEEE,GISEL-IEEE %s 4 5; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,SDAG,GCN-DAZ,SDAG-DAZ %s 6; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,GISEL,GCN-DAZ,GISEL-DAZ %s 7 8define float @v_sqrt_f32(float %x) { 9; SDAG-IEEE-LABEL: v_sqrt_f32: 10; SDAG-IEEE: ; %bb.0: 11; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 13; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 14; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 15; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 16; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 17; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 18; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 19; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 20; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 21; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 22; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 23; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 24; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 25; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 26; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 27; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 28; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 29; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 30; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 31; 32; GISEL-IEEE-LABEL: v_sqrt_f32: 33; GISEL-IEEE: ; %bb.0: 34; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 36; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 37; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 38; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 39; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 40; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 41; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 42; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 43; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 44; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 45; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 46; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 47; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 48; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 49; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 50; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 51; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 52; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 53; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 54; 55; SDAG-DAZ-LABEL: v_sqrt_f32: 56; SDAG-DAZ: ; %bb.0: 57; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 59; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 60; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 61; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 62; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 63; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 64; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 65; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 66; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 67; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 68; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 69; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 70; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 71; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 72; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 73; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 74; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 75; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 76; 77; GISEL-DAZ-LABEL: v_sqrt_f32: 78; GISEL-DAZ: ; %bb.0: 79; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 81; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 82; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 83; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 84; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 85; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 86; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 87; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 88; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 89; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 90; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 91; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 92; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 93; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 94; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 95; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 96; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 97; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 98 %result = call float @llvm.sqrt.f32(float %x) 99 ret float %result 100} 101 102define float @v_sqrt_f32_fneg(float %x) { 103; SDAG-IEEE-LABEL: v_sqrt_f32_fneg: 104; SDAG-IEEE: ; %bb.0: 105; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x8f800000 107; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0xcf800000, v0 108; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 109; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc 110; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 111; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 112; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 113; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 114; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 115; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 116; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 117; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 118; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 119; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 120; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 121; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 122; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 123; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 124; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 125; 126; GISEL-IEEE-LABEL: v_sqrt_f32_fneg: 127; GISEL-IEEE: ; %bb.0: 128; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 130; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4f800000 131; GISEL-IEEE-NEXT: v_mul_f32_e64 v2, -v0, v2 132; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 133; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc 134; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 135; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 136; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 137; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 138; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 139; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 140; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 141; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 142; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 143; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 144; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 145; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 146; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 147; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 148; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 149; 150; SDAG-DAZ-LABEL: v_sqrt_f32_fneg: 151; SDAG-DAZ: ; %bb.0: 152; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 153; SDAG-DAZ-NEXT: s_mov_b32 s4, 0x8f800000 154; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0xcf800000, v0 155; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 156; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc 157; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 158; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 159; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 160; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 161; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 162; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 163; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 164; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 165; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 166; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 167; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 168; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 169; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 170; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 171; 172; GISEL-DAZ-LABEL: v_sqrt_f32_fneg: 173; GISEL-DAZ: ; %bb.0: 174; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 176; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x4f800000 177; GISEL-DAZ-NEXT: v_mul_f32_e64 v2, -v0, v2 178; GISEL-DAZ-NEXT: v_cmp_lt_f32_e64 vcc, -v0, v1 179; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc 180; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 181; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 182; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 183; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 184; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 185; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 186; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 187; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 188; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 189; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 190; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 191; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 192; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 193; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 194 %x.neg = fneg float %x 195 %result = call float @llvm.sqrt.f32(float %x.neg) 196 ret float %result 197} 198 199define float @v_sqrt_f32_fabs(float %x) { 200; SDAG-IEEE-LABEL: v_sqrt_f32_fabs: 201; SDAG-IEEE: ; %bb.0: 202; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 203; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 204; SDAG-IEEE-NEXT: s_mov_b32 s5, 0x4f800000 205; SDAG-IEEE-NEXT: v_mul_f32_e64 v1, |v0|, s5 206; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 207; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc 208; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 209; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 210; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 211; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 212; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 213; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 214; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 215; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 216; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 217; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 218; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 219; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 220; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 221; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 222; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 223; 224; GISEL-IEEE-LABEL: v_sqrt_f32_fabs: 225; GISEL-IEEE: ; %bb.0: 226; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 227; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 228; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4f800000 229; GISEL-IEEE-NEXT: v_mul_f32_e64 v2, |v0|, v2 230; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 231; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc 232; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 233; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 234; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 235; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 236; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 237; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 238; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 239; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 240; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 241; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 242; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 243; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 244; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 245; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 246; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 247; 248; SDAG-DAZ-LABEL: v_sqrt_f32_fabs: 249; SDAG-DAZ: ; %bb.0: 250; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 251; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 252; SDAG-DAZ-NEXT: s_mov_b32 s5, 0x4f800000 253; SDAG-DAZ-NEXT: v_mul_f32_e64 v1, |v0|, s5 254; SDAG-DAZ-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 255; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v0, |v0|, v1, vcc 256; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 257; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 258; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 259; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 260; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 261; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 262; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 263; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 264; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 265; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 266; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 267; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 268; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 269; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 270; 271; GISEL-DAZ-LABEL: v_sqrt_f32_fabs: 272; GISEL-DAZ: ; %bb.0: 273; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 275; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x4f800000 276; GISEL-DAZ-NEXT: v_mul_f32_e64 v2, |v0|, v2 277; GISEL-DAZ-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 278; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc 279; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 280; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 281; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 282; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 283; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 284; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 285; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 286; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 287; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 288; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 289; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 290; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 291; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 292; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 293 %x.fabs = call float @llvm.fabs.f32(float %x) 294 %result = call float @llvm.sqrt.f32(float %x.fabs) 295 ret float %result 296} 297 298define float @v_sqrt_f32_fneg_fabs(float %x) { 299; SDAG-IEEE-LABEL: v_sqrt_f32_fneg_fabs: 300; SDAG-IEEE: ; %bb.0: 301; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 302; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x8f800000 303; SDAG-IEEE-NEXT: s_mov_b32 s5, 0xcf800000 304; SDAG-IEEE-NEXT: v_mul_f32_e64 v1, |v0|, s5 305; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 306; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, -|v0|, v1, vcc 307; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 308; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 309; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 310; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 311; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 312; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 313; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 314; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 315; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 316; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 317; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 318; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 319; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 320; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 321; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 322; 323; GISEL-IEEE-LABEL: v_sqrt_f32_fneg_fabs: 324; GISEL-IEEE: ; %bb.0: 325; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 326; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 327; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4f800000 328; GISEL-IEEE-NEXT: v_mul_f32_e64 v2, -|v0|, v2 329; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 330; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, -|v0|, v2, vcc 331; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 332; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 333; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 334; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 335; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 336; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 337; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 338; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 339; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 340; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 341; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 342; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 343; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 344; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 345; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 346; 347; SDAG-DAZ-LABEL: v_sqrt_f32_fneg_fabs: 348; SDAG-DAZ: ; %bb.0: 349; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 350; SDAG-DAZ-NEXT: s_mov_b32 s4, 0x8f800000 351; SDAG-DAZ-NEXT: s_mov_b32 s5, 0xcf800000 352; SDAG-DAZ-NEXT: v_mul_f32_e64 v1, |v0|, s5 353; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 354; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v0, -|v0|, v1, vcc 355; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 356; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 357; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 358; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 359; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 360; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 361; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 362; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 363; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 364; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 365; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 366; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 367; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 368; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 369; 370; GISEL-DAZ-LABEL: v_sqrt_f32_fneg_fabs: 371; GISEL-DAZ: ; %bb.0: 372; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 373; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 374; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x4f800000 375; GISEL-DAZ-NEXT: v_mul_f32_e64 v2, -|v0|, v2 376; GISEL-DAZ-NEXT: v_cmp_lt_f32_e64 vcc, -|v0|, v1 377; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v0, -|v0|, v2, vcc 378; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 379; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 380; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 381; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 382; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 383; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 384; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 385; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 386; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 387; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 388; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 389; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 390; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 391; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 392 %x.fabs = call float @llvm.fabs.f32(float %x) 393 %x.fabs.neg = fneg float %x.fabs 394 %result = call float @llvm.sqrt.f32(float %x.fabs.neg) 395 ret float %result 396} 397 398define float @v_sqrt_f32_ninf(float %x) { 399; SDAG-IEEE-LABEL: v_sqrt_f32_ninf: 400; SDAG-IEEE: ; %bb.0: 401; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 402; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 403; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 404; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 405; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 406; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 407; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 408; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 409; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 410; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 411; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 412; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 413; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 414; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 415; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 416; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 417; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 418; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 419; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 420; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 421; 422; GISEL-IEEE-LABEL: v_sqrt_f32_ninf: 423; GISEL-IEEE: ; %bb.0: 424; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 426; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 427; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 428; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 429; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 430; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 431; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 432; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 433; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 434; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 435; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 436; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 437; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 438; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 439; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 440; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 441; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 442; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 443; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 444; 445; SDAG-DAZ-LABEL: v_sqrt_f32_ninf: 446; SDAG-DAZ: ; %bb.0: 447; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 448; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 449; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 450; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 451; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 452; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 453; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 454; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 455; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 456; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 457; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 458; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 459; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 460; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 461; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 462; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 463; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 464; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 465; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 466; 467; GISEL-DAZ-LABEL: v_sqrt_f32_ninf: 468; GISEL-DAZ: ; %bb.0: 469; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 470; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 471; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 472; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 473; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 474; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 475; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 476; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 477; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 478; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 479; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 480; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 481; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 482; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 483; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 484; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 485; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 486; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 487; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 488 %result = call ninf float @llvm.sqrt.f32(float %x) 489 ret float %result 490} 491 492define float @v_sqrt_f32_no_infs_attribute(float %x) #5 { 493; SDAG-IEEE-LABEL: v_sqrt_f32_no_infs_attribute: 494; SDAG-IEEE: ; %bb.0: 495; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 496; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 497; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 498; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 499; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 500; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 501; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 502; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 503; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 504; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 505; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 506; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 507; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 508; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 509; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 510; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 511; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 512; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 513; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 514; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 515; 516; GISEL-IEEE-LABEL: v_sqrt_f32_no_infs_attribute: 517; GISEL-IEEE: ; %bb.0: 518; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 519; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 520; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 521; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 522; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 523; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 524; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 525; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 526; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 527; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 528; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 529; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 530; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 531; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 532; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 533; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 534; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 535; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 536; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 537; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 538; 539; SDAG-DAZ-LABEL: v_sqrt_f32_no_infs_attribute: 540; SDAG-DAZ: ; %bb.0: 541; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 542; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 543; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 544; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 545; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 546; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 547; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 548; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 549; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 550; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 551; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 552; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 553; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 554; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 555; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 556; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 557; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 558; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 559; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 560; 561; GISEL-DAZ-LABEL: v_sqrt_f32_no_infs_attribute: 562; GISEL-DAZ: ; %bb.0: 563; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 564; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 565; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 566; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 567; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 568; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 569; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 570; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 571; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 572; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 573; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 574; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 575; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 576; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 577; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 578; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 579; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 580; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 581; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 582 %result = call ninf float @llvm.sqrt.f32(float %x) 583 ret float %result 584} 585 586define float @v_sqrt_f32_nnan(float %x) { 587; SDAG-IEEE-LABEL: v_sqrt_f32_nnan: 588; SDAG-IEEE: ; %bb.0: 589; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 590; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 591; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 592; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 593; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 594; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 595; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 596; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 597; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 598; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 599; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 600; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 601; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 602; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 603; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 604; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 605; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 606; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 607; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 608; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 609; 610; GISEL-IEEE-LABEL: v_sqrt_f32_nnan: 611; GISEL-IEEE: ; %bb.0: 612; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 613; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 614; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 615; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 616; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 617; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 618; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 619; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 620; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 621; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 622; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 623; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 624; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 625; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 626; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 627; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 628; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 629; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 630; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 631; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 632; 633; SDAG-DAZ-LABEL: v_sqrt_f32_nnan: 634; SDAG-DAZ: ; %bb.0: 635; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 636; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 637; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 638; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 639; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 640; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 641; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 642; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 643; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 644; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 645; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 646; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 647; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 648; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 649; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 650; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 651; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 652; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 653; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 654; 655; GISEL-DAZ-LABEL: v_sqrt_f32_nnan: 656; GISEL-DAZ: ; %bb.0: 657; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 658; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 659; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 660; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 661; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 662; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 663; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 664; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 665; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 666; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 667; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 668; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 669; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 670; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 671; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 672; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 673; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 674; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 675; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 676 %result = call nnan float @llvm.sqrt.f32(float %x) 677 ret float %result 678} 679 680define amdgpu_ps i32 @s_sqrt_f32(float inreg %x) { 681; SDAG-IEEE-LABEL: s_sqrt_f32: 682; SDAG-IEEE: ; %bb.0: 683; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 684; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 685; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s0, v1 686; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, s0 687; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, v0 688; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1] 689; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 690; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v1 691; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 692; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3 693; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 694; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v1 695; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 696; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 697; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 698; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 699; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 700; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 701; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 702; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 703; SDAG-IEEE-NEXT: v_readfirstlane_b32 s0, v0 704; SDAG-IEEE-NEXT: ; return to shader part epilog 705; 706; GISEL-IEEE-LABEL: s_sqrt_f32: 707; GISEL-IEEE: ; %bb.0: 708; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 709; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4f800000 710; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, s0 711; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, s0, v2 712; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 713; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 714; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 715; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[0:1], -1, v1 716; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 717; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], 1, v1 718; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 719; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v3 720; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 721; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v5 722; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] 723; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 724; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 725; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 726; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 727; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 728; GISEL-IEEE-NEXT: v_readfirstlane_b32 s0, v0 729; GISEL-IEEE-NEXT: ; return to shader part epilog 730; 731; SDAG-DAZ-LABEL: s_sqrt_f32: 732; SDAG-DAZ: ; %bb.0: 733; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 734; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 735; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s0, v1 736; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, s0 737; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 738; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 739; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 740; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 741; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 742; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 743; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 744; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 745; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 746; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 747; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 748; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 749; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 750; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 751; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 752; SDAG-DAZ-NEXT: v_readfirstlane_b32 s0, v0 753; SDAG-DAZ-NEXT: ; return to shader part epilog 754; 755; GISEL-DAZ-LABEL: s_sqrt_f32: 756; GISEL-DAZ: ; %bb.0: 757; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 758; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x4f800000 759; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, s0 760; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, s0, v2 761; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 762; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 763; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 764; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 765; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 766; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 767; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 768; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 769; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 770; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 771; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 772; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 773; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 774; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 775; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 776; GISEL-DAZ-NEXT: v_readfirstlane_b32 s0, v0 777; GISEL-DAZ-NEXT: ; return to shader part epilog 778 %result = call float @llvm.sqrt.f32(float %x) 779 %cast = bitcast float %result to i32 780 %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast) 781 ret i32 %firstlane 782} 783 784define amdgpu_ps i32 @s_sqrt_f32_ninf(float inreg %x) { 785; SDAG-IEEE-LABEL: s_sqrt_f32_ninf: 786; SDAG-IEEE: ; %bb.0: 787; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 788; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 789; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s0, v1 790; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, s0 791; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, v0 792; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1] 793; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 794; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v1 795; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 796; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3 797; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 798; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v1 799; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 800; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 801; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 802; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 803; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 804; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 805; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 806; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 807; SDAG-IEEE-NEXT: v_readfirstlane_b32 s0, v0 808; SDAG-IEEE-NEXT: ; return to shader part epilog 809; 810; GISEL-IEEE-LABEL: s_sqrt_f32_ninf: 811; GISEL-IEEE: ; %bb.0: 812; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 813; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x4f800000 814; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, s0 815; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, s0, v2 816; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 817; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 818; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 819; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[0:1], -1, v1 820; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 821; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], 1, v1 822; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 823; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v3 824; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 825; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v5 826; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] 827; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 828; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 829; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 830; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 831; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 832; GISEL-IEEE-NEXT: v_readfirstlane_b32 s0, v0 833; GISEL-IEEE-NEXT: ; return to shader part epilog 834; 835; SDAG-DAZ-LABEL: s_sqrt_f32_ninf: 836; SDAG-DAZ: ; %bb.0: 837; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 838; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 839; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s0, v1 840; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, s0 841; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 842; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 843; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 844; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 845; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 846; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 847; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 848; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 849; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 850; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 851; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 852; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 853; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 854; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 855; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 856; SDAG-DAZ-NEXT: v_readfirstlane_b32 s0, v0 857; SDAG-DAZ-NEXT: ; return to shader part epilog 858; 859; GISEL-DAZ-LABEL: s_sqrt_f32_ninf: 860; GISEL-DAZ: ; %bb.0: 861; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 862; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x4f800000 863; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, s0 864; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, s0, v2 865; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s0, v1 866; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 867; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 868; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 869; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 870; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 871; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 872; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 873; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 874; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 875; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 876; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 877; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 878; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 879; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 880; GISEL-DAZ-NEXT: v_readfirstlane_b32 s0, v0 881; GISEL-DAZ-NEXT: ; return to shader part epilog 882 %result = call ninf float @llvm.sqrt.f32(float %x) 883 %cast = bitcast float %result to i32 884 %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast) 885 ret i32 %firstlane 886} 887 888define amdgpu_ps i32 @s_sqrt_f32_afn(float inreg %x) { 889; GCN-LABEL: s_sqrt_f32_afn: 890; GCN: ; %bb.0: 891; GCN-NEXT: v_sqrt_f32_e32 v0, s0 892; GCN-NEXT: v_readfirstlane_b32 s0, v0 893; GCN-NEXT: ; return to shader part epilog 894 %result = call afn float @llvm.sqrt.f32(float %x) 895 %cast = bitcast float %result to i32 896 %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast) 897 ret i32 %firstlane 898} 899 900define amdgpu_ps i32 @s_sqrt_f32_afn_nnan_ninf(float inreg %x) { 901; GCN-LABEL: s_sqrt_f32_afn_nnan_ninf: 902; GCN: ; %bb.0: 903; GCN-NEXT: v_sqrt_f32_e32 v0, s0 904; GCN-NEXT: v_readfirstlane_b32 s0, v0 905; GCN-NEXT: ; return to shader part epilog 906 %result = call afn nnan ninf float @llvm.sqrt.f32(float %x) 907 %cast = bitcast float %result to i32 908 %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast) 909 ret i32 %firstlane 910} 911 912define float @v_sqrt_f32_nsz(float %x) { 913; SDAG-IEEE-LABEL: v_sqrt_f32_nsz: 914; SDAG-IEEE: ; %bb.0: 915; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 916; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 917; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 918; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 919; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 920; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 921; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 922; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 923; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 924; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 925; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 926; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 927; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 928; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 929; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 930; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 931; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 932; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 933; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 934; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 935; 936; GISEL-IEEE-LABEL: v_sqrt_f32_nsz: 937; GISEL-IEEE: ; %bb.0: 938; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 939; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 940; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 941; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 942; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 943; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 944; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 945; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 946; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 947; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 948; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 949; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 950; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 951; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 952; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 953; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 954; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 955; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 956; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 957; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 958; 959; SDAG-DAZ-LABEL: v_sqrt_f32_nsz: 960; SDAG-DAZ: ; %bb.0: 961; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 962; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 963; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 964; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 965; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 966; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 967; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 968; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 969; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 970; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 971; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 972; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 973; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 974; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 975; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 976; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 977; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 978; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 979; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 980; 981; GISEL-DAZ-LABEL: v_sqrt_f32_nsz: 982; GISEL-DAZ: ; %bb.0: 983; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 984; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 985; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 986; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 987; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 988; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 989; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 990; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 991; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 992; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 993; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 994; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 995; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 996; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 997; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 998; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 999; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1000; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1001; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 1002 %result = call nsz float @llvm.sqrt.f32(float %x) 1003 ret float %result 1004} 1005 1006define float @v_sqrt_f32_nnan_ninf(float %x) { 1007; SDAG-IEEE-LABEL: v_sqrt_f32_nnan_ninf: 1008; SDAG-IEEE: ; %bb.0: 1009; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1010; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 1011; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 1012; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1013; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1014; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 1015; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 1016; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 1017; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 1018; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 1019; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 1020; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 1021; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 1022; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 1023; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1024; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1025; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 1026; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1027; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1028; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 1029; 1030; GISEL-IEEE-LABEL: v_sqrt_f32_nnan_ninf: 1031; GISEL-IEEE: ; %bb.0: 1032; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1033; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 1034; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1035; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1036; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1037; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 1038; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 1039; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 1040; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 1041; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 1042; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 1043; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 1044; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 1045; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 1046; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1047; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1048; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 1049; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1050; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1051; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 1052; 1053; SDAG-DAZ-LABEL: v_sqrt_f32_nnan_ninf: 1054; SDAG-DAZ: ; %bb.0: 1055; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1056; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 1057; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 1058; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1059; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1060; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 1061; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 1062; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 1063; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 1064; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 1065; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 1066; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 1067; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 1068; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1069; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1070; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 1071; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1072; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1073; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 1074; 1075; GISEL-DAZ-LABEL: v_sqrt_f32_nnan_ninf: 1076; GISEL-DAZ: ; %bb.0: 1077; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1078; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 1079; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1080; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1081; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1082; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 1083; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 1084; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 1085; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 1086; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 1087; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 1088; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 1089; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 1090; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1091; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1092; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 1093; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1094; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1095; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 1096 %result = call nnan ninf float @llvm.sqrt.f32(float %x) 1097 ret float %result 1098} 1099 1100define float @v_sqrt_f32_nnan_ninf_nsz(float %x) { 1101; SDAG-IEEE-LABEL: v_sqrt_f32_nnan_ninf_nsz: 1102; SDAG-IEEE: ; %bb.0: 1103; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1104; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 1105; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 1106; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1107; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1108; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 1109; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 1110; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 1111; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 1112; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 1113; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 1114; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 1115; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 1116; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 1117; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1118; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1119; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 1120; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1121; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1122; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 1123; 1124; GISEL-IEEE-LABEL: v_sqrt_f32_nnan_ninf_nsz: 1125; GISEL-IEEE: ; %bb.0: 1126; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1127; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 1128; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1129; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1130; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1131; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 1132; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 1133; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 1134; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 1135; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 1136; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 1137; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 1138; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 1139; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 1140; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1141; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1142; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 1143; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1144; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1145; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 1146; 1147; SDAG-DAZ-LABEL: v_sqrt_f32_nnan_ninf_nsz: 1148; SDAG-DAZ: ; %bb.0: 1149; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1150; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 1151; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 1152; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1153; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1154; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 1155; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 1156; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 1157; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 1158; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 1159; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 1160; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 1161; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 1162; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1163; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1164; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 1165; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1166; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1167; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 1168; 1169; GISEL-DAZ-LABEL: v_sqrt_f32_nnan_ninf_nsz: 1170; GISEL-DAZ: ; %bb.0: 1171; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1172; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 1173; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1174; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1175; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1176; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 1177; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 1178; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 1179; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 1180; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 1181; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 1182; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 1183; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 1184; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1185; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1186; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 1187; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1188; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1189; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 1190 %result = call nnan ninf nsz float @llvm.sqrt.f32(float %x) 1191 ret float %result 1192} 1193 1194define float @v_sqrt_f32_afn(float %x) { 1195; GCN-LABEL: v_sqrt_f32_afn: 1196; GCN: ; %bb.0: 1197; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1198; GCN-NEXT: v_sqrt_f32_e32 v0, v0 1199; GCN-NEXT: s_setpc_b64 s[30:31] 1200 %result = call afn float @llvm.sqrt.f32(float %x) 1201 ret float %result 1202} 1203 1204define float @v_sqrt_f32_afn_nsz(float %x) { 1205; GCN-LABEL: v_sqrt_f32_afn_nsz: 1206; GCN: ; %bb.0: 1207; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1208; GCN-NEXT: v_sqrt_f32_e32 v0, v0 1209; GCN-NEXT: s_setpc_b64 s[30:31] 1210 %result = call afn nsz float @llvm.sqrt.f32(float %x) 1211 ret float %result 1212} 1213 1214define <2 x float> @v_sqrt_v2f32_afn(<2 x float> %x) { 1215; GCN-LABEL: v_sqrt_v2f32_afn: 1216; GCN: ; %bb.0: 1217; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1218; GCN-NEXT: v_sqrt_f32_e32 v0, v0 1219; GCN-NEXT: v_sqrt_f32_e32 v1, v1 1220; GCN-NEXT: s_setpc_b64 s[30:31] 1221 %result = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) 1222 ret <2 x float> %result 1223} 1224 1225define float @v_sqrt_f32_afn_nnan(float %x) { 1226; GCN-LABEL: v_sqrt_f32_afn_nnan: 1227; GCN: ; %bb.0: 1228; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1229; GCN-NEXT: v_sqrt_f32_e32 v0, v0 1230; GCN-NEXT: s_setpc_b64 s[30:31] 1231 %result = call afn nnan float @llvm.sqrt.f32(float %x) 1232 ret float %result 1233} 1234 1235define float @v_sqrt_f32_fabs_afn_ninf(float %x) { 1236; GCN-LABEL: v_sqrt_f32_fabs_afn_ninf: 1237; GCN: ; %bb.0: 1238; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1239; GCN-NEXT: v_sqrt_f32_e64 v0, |v0| 1240; GCN-NEXT: s_setpc_b64 s[30:31] 1241 %fabs = call float @llvm.fabs.f32(float %x) 1242 %result = call afn ninf float @llvm.sqrt.f32(float %fabs) 1243 ret float %result 1244} 1245 1246define float @v_sqrt_f32_afn_nnan_ninf(float %x) { 1247; GCN-LABEL: v_sqrt_f32_afn_nnan_ninf: 1248; GCN: ; %bb.0: 1249; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1250; GCN-NEXT: v_sqrt_f32_e32 v0, v0 1251; GCN-NEXT: s_setpc_b64 s[30:31] 1252 %result = call afn nnan ninf float @llvm.sqrt.f32(float %x) 1253 ret float %result 1254} 1255 1256define <2 x float> @v_sqrt_v2f32_afn_nnan_ninf(<2 x float> %x) { 1257; GCN-LABEL: v_sqrt_v2f32_afn_nnan_ninf: 1258; GCN: ; %bb.0: 1259; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1260; GCN-NEXT: v_sqrt_f32_e32 v0, v0 1261; GCN-NEXT: v_sqrt_f32_e32 v1, v1 1262; GCN-NEXT: s_setpc_b64 s[30:31] 1263 %result = call afn nnan ninf <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) 1264 ret <2 x float> %result 1265} 1266 1267define float @v_sqrt_f32_afn_nnan_ninf_nsz(float %x) { 1268; GCN-LABEL: v_sqrt_f32_afn_nnan_ninf_nsz: 1269; GCN: ; %bb.0: 1270; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1271; GCN-NEXT: v_sqrt_f32_e32 v0, v0 1272; GCN-NEXT: s_setpc_b64 s[30:31] 1273 %result = call afn nnan ninf nsz float @llvm.sqrt.f32(float %x) 1274 ret float %result 1275} 1276 1277define float @v_sqrt_f32__approx_func_fp_math(float %x) #2 { 1278; GCN-LABEL: v_sqrt_f32__approx_func_fp_math: 1279; GCN: ; %bb.0: 1280; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1281; GCN-NEXT: v_sqrt_f32_e32 v0, v0 1282; GCN-NEXT: s_setpc_b64 s[30:31] 1283 %result = call nsz float @llvm.sqrt.f32(float %x) 1284 ret float %result 1285} 1286 1287define float @v_sqrt_f32__enough_unsafe_attrs(float %x) #3 { 1288; GCN-LABEL: v_sqrt_f32__enough_unsafe_attrs: 1289; GCN: ; %bb.0: 1290; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1291; GCN-NEXT: v_sqrt_f32_e32 v0, v0 1292; GCN-NEXT: s_setpc_b64 s[30:31] 1293 %result = call nsz float @llvm.sqrt.f32(float %x) 1294 ret float %result 1295} 1296 1297define float @v_sqrt_f32__unsafe_attr(float %x) #4 { 1298; GCN-LABEL: v_sqrt_f32__unsafe_attr: 1299; GCN: ; %bb.0: 1300; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1301; GCN-NEXT: v_sqrt_f32_e32 v0, v0 1302; GCN-NEXT: s_setpc_b64 s[30:31] 1303 %result = call nsz float @llvm.sqrt.f32(float %x) 1304 ret float %result 1305} 1306 1307define <2 x float> @v_sqrt_v2f32(<2 x float> %x) { 1308; SDAG-IEEE-LABEL: v_sqrt_v2f32: 1309; SDAG-IEEE: ; %bb.0: 1310; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1311; SDAG-IEEE-NEXT: s_mov_b32 s6, 0xf800000 1312; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1313; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0 1314; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1315; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v2, v0 1316; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 1317; SDAG-IEEE-NEXT: v_fma_f32 v4, -v3, v2, v0 1318; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 1319; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 1320; SDAG-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2 1321; SDAG-IEEE-NEXT: v_fma_f32 v2, -v4, v2, v0 1322; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2 1323; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5] 1324; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1325; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1326; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x4f800000, v1 1327; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1 1328; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1329; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v4, v1 1330; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, 0x260 1331; SDAG-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v3 1332; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] 1333; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v4 1334; SDAG-IEEE-NEXT: v_fma_f32 v5, -v2, v4, v1 1335; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 1336; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5] 1337; SDAG-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v4 1338; SDAG-IEEE-NEXT: v_fma_f32 v4, -v5, v4, v1 1339; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4 1340; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] 1341; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v2 1342; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 1343; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v3 1344; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 1345; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 1346; 1347; GISEL-IEEE-LABEL: v_sqrt_v2f32: 1348; GISEL-IEEE: ; %bb.0: 1349; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1350; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0xf800000 1351; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x4f800000, v0 1352; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 1353; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1354; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v3, v0 1355; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v3 1356; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v3, v0 1357; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v3 1358; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v3, v0 1359; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 1360; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] 1361; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v7 1362; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5] 1363; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 1364; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 1365; GISEL-IEEE-NEXT: v_mul_f32_e32 v5, 0x4f800000, v1 1366; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 1367; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1368; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v2, v1 1369; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x260 1370; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v4 1371; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5] 1372; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 1373; GISEL-IEEE-NEXT: v_fma_f32 v5, -v3, v2, v1 1374; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v2 1375; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v2, v1 1376; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 1377; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] 1378; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v7 1379; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5] 1380; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1381; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1382; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v4 1383; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 1384; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 1385; 1386; SDAG-DAZ-LABEL: v_sqrt_v2f32: 1387; SDAG-DAZ: ; %bb.0: 1388; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1389; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 1390; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1391; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1392; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1393; SDAG-DAZ-NEXT: v_rsq_f32_e32 v2, v0 1394; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, v0, v2 1395; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0.5, v2 1396; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v3, 0.5 1397; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v4, v3 1398; SDAG-DAZ-NEXT: v_fma_f32 v5, -v3, v3, v0 1399; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v4, v2 1400; SDAG-DAZ-NEXT: v_fma_f32 v2, v5, v2, v3 1401; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1402; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1403; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x4f800000, v1 1404; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 1405; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1406; SDAG-DAZ-NEXT: v_rsq_f32_e32 v3, v1 1407; SDAG-DAZ-NEXT: v_mov_b32_e32 v4, 0x260 1408; SDAG-DAZ-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v4 1409; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] 1410; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v1, v3 1411; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3 1412; SDAG-DAZ-NEXT: v_fma_f32 v5, -v3, v2, 0.5 1413; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v5, v2 1414; SDAG-DAZ-NEXT: v_fma_f32 v6, -v2, v2, v1 1415; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3 1416; SDAG-DAZ-NEXT: v_fma_f32 v2, v6, v3, v2 1417; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1418; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1419; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v1, v4 1420; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 1421; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 1422; 1423; GISEL-DAZ-LABEL: v_sqrt_v2f32: 1424; GISEL-DAZ: ; %bb.0: 1425; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1426; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0xf800000 1427; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0x4f800000, v0 1428; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 1429; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1430; GISEL-DAZ-NEXT: v_rsq_f32_e32 v3, v0 1431; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, v0, v3 1432; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3 1433; GISEL-DAZ-NEXT: v_fma_f32 v5, -v3, v4, 0.5 1434; GISEL-DAZ-NEXT: v_fma_f32 v4, v4, v5, v4 1435; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3 1436; GISEL-DAZ-NEXT: v_fma_f32 v5, -v4, v4, v0 1437; GISEL-DAZ-NEXT: v_fma_f32 v3, v5, v3, v4 1438; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 1439; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 1440; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, 0x4f800000, v1 1441; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 1442; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1443; GISEL-DAZ-NEXT: v_rsq_f32_e32 v2, v1 1444; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x260 1445; GISEL-DAZ-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v4 1446; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5] 1447; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, v1, v2 1448; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0.5, v2 1449; GISEL-DAZ-NEXT: v_fma_f32 v5, -v2, v3, 0.5 1450; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3 1451; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v5, v2 1452; GISEL-DAZ-NEXT: v_fma_f32 v5, -v3, v3, v1 1453; GISEL-DAZ-NEXT: v_fma_f32 v2, v5, v2, v3 1454; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1455; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1456; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v1, v4 1457; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 1458; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 1459 %result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) 1460 ret <2 x float> %result 1461} 1462 1463define <3 x float> @v_sqrt_v3f32(<3 x float> %x) { 1464; SDAG-IEEE-LABEL: v_sqrt_v3f32: 1465; SDAG-IEEE: ; %bb.0: 1466; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1467; SDAG-IEEE-NEXT: s_mov_b32 s6, 0xf800000 1468; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x4f800000, v0 1469; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0 1470; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1471; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v3, v0 1472; SDAG-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v3 1473; SDAG-IEEE-NEXT: v_fma_f32 v5, -v4, v3, v0 1474; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 1475; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, v3, v4, s[4:5] 1476; SDAG-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v3 1477; SDAG-IEEE-NEXT: v_fma_f32 v3, -v5, v3, v0 1478; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v3 1479; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v4, v5, s[4:5] 1480; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 1481; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 1482; SDAG-IEEE-NEXT: v_mul_f32_e32 v5, 0x4f800000, v1 1483; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1 1484; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1485; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v5, v1 1486; SDAG-IEEE-NEXT: v_mov_b32_e32 v4, 0x260 1487; SDAG-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v4 1488; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5] 1489; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v5 1490; SDAG-IEEE-NEXT: v_fma_f32 v6, -v3, v5, v1 1491; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6 1492; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] 1493; SDAG-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v5 1494; SDAG-IEEE-NEXT: v_fma_f32 v5, -v6, v5, v1 1495; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 1496; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5] 1497; SDAG-IEEE-NEXT: v_mul_f32_e32 v5, 0x37800000, v3 1498; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 1499; SDAG-IEEE-NEXT: v_mul_f32_e32 v5, 0x4f800000, v2 1500; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v2 1501; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 1502; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v5, v2 1503; SDAG-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v4 1504; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5] 1505; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v5 1506; SDAG-IEEE-NEXT: v_fma_f32 v6, -v3, v5, v2 1507; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6 1508; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] 1509; SDAG-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v5 1510; SDAG-IEEE-NEXT: v_fma_f32 v5, -v6, v5, v2 1511; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 1512; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5] 1513; SDAG-IEEE-NEXT: v_mul_f32_e32 v5, 0x37800000, v3 1514; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 1515; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v2, v4 1516; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 1517; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 1518; 1519; GISEL-IEEE-LABEL: v_sqrt_v3f32: 1520; GISEL-IEEE: ; %bb.0: 1521; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1522; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0xf800000 1523; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x4f800000, v0 1524; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 1525; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1526; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v4, v0 1527; GISEL-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], -1, v4 1528; GISEL-IEEE-NEXT: v_fma_f32 v6, -v5, v4, v0 1529; GISEL-IEEE-NEXT: v_add_i32_e64 v7, s[4:5], 1, v4 1530; GISEL-IEEE-NEXT: v_fma_f32 v8, -v7, v4, v0 1531; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6 1532; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] 1533; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v8 1534; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] 1535; GISEL-IEEE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 1536; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1537; GISEL-IEEE-NEXT: v_mul_f32_e32 v6, 0x4f800000, v1 1538; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3 1539; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc 1540; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v6, v1 1541; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, 0x260 1542; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v5 1543; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] 1544; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v6 1545; GISEL-IEEE-NEXT: v_fma_f32 v7, -v4, v6, v1 1546; GISEL-IEEE-NEXT: v_add_i32_e64 v8, s[4:5], 1, v6 1547; GISEL-IEEE-NEXT: v_fma_f32 v9, -v8, v6, v1 1548; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v7 1549; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5] 1550; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v9 1551; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5] 1552; GISEL-IEEE-NEXT: v_mul_f32_e32 v6, 0x37800000, v4 1553; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 1554; GISEL-IEEE-NEXT: v_mul_f32_e32 v6, 0x4f800000, v2 1555; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v2, v3 1556; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 1557; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v3, v2 1558; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v5 1559; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] 1560; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v3 1561; GISEL-IEEE-NEXT: v_fma_f32 v6, -v4, v3, v2 1562; GISEL-IEEE-NEXT: v_add_i32_e64 v7, s[4:5], 1, v3 1563; GISEL-IEEE-NEXT: v_fma_f32 v8, -v7, v3, v2 1564; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6 1565; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] 1566; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v8 1567; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[4:5] 1568; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 1569; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 1570; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v2, v5 1571; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 1572; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 1573; 1574; SDAG-DAZ-LABEL: v_sqrt_v3f32: 1575; SDAG-DAZ: ; %bb.0: 1576; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1577; SDAG-DAZ-NEXT: s_mov_b32 s6, 0xf800000 1578; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x4f800000, v0 1579; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0 1580; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1581; SDAG-DAZ-NEXT: v_rsq_f32_e32 v3, v0 1582; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, v0, v3 1583; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3 1584; SDAG-DAZ-NEXT: v_fma_f32 v5, -v3, v4, 0.5 1585; SDAG-DAZ-NEXT: v_fma_f32 v4, v4, v5, v4 1586; SDAG-DAZ-NEXT: v_fma_f32 v6, -v4, v4, v0 1587; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3 1588; SDAG-DAZ-NEXT: v_fma_f32 v3, v6, v3, v4 1589; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 1590; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 1591; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x4f800000, v1 1592; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1 1593; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1594; SDAG-DAZ-NEXT: v_rsq_f32_e32 v4, v1 1595; SDAG-DAZ-NEXT: v_mov_b32_e32 v5, 0x260 1596; SDAG-DAZ-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v5 1597; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5] 1598; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, v1, v4 1599; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0.5, v4 1600; SDAG-DAZ-NEXT: v_fma_f32 v6, -v4, v3, 0.5 1601; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v6, v3 1602; SDAG-DAZ-NEXT: v_fma_f32 v4, v4, v6, v4 1603; SDAG-DAZ-NEXT: v_mul_f32_e32 v6, 0x4f800000, v2 1604; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 s[4:5], s6, v2 1605; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5] 1606; SDAG-DAZ-NEXT: v_fma_f32 v7, -v3, v3, v1 1607; SDAG-DAZ-NEXT: v_rsq_f32_e32 v6, v2 1608; SDAG-DAZ-NEXT: v_fma_f32 v3, v7, v4, v3 1609; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 1610; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 1611; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v1, v5 1612; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 1613; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, v2, v6 1614; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0.5, v6 1615; SDAG-DAZ-NEXT: v_fma_f32 v6, -v4, v3, 0.5 1616; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v6, v3 1617; SDAG-DAZ-NEXT: v_fma_f32 v7, -v3, v3, v2 1618; SDAG-DAZ-NEXT: v_fma_f32 v4, v4, v6, v4 1619; SDAG-DAZ-NEXT: v_fma_f32 v3, v7, v4, v3 1620; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 1621; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] 1622; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v2, v5 1623; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 1624; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 1625; 1626; GISEL-DAZ-LABEL: v_sqrt_v3f32: 1627; GISEL-DAZ: ; %bb.0: 1628; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1629; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0xf800000 1630; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, 0x4f800000, v0 1631; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v3 1632; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1633; GISEL-DAZ-NEXT: v_rsq_f32_e32 v4, v0 1634; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, v0, v4 1635; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, 0.5, v4 1636; GISEL-DAZ-NEXT: v_fma_f32 v6, -v4, v5, 0.5 1637; GISEL-DAZ-NEXT: v_fma_f32 v5, v5, v6, v5 1638; GISEL-DAZ-NEXT: v_fma_f32 v4, v4, v6, v4 1639; GISEL-DAZ-NEXT: v_fma_f32 v6, -v5, v5, v0 1640; GISEL-DAZ-NEXT: v_fma_f32 v4, v6, v4, v5 1641; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 1642; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1643; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0x4f800000, v1 1644; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v1, v3 1645; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1646; GISEL-DAZ-NEXT: v_rsq_f32_e32 v5, v1 1647; GISEL-DAZ-NEXT: v_mov_b32_e32 v6, 0x260 1648; GISEL-DAZ-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v6 1649; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] 1650; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, v1, v5 1651; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0.5, v5 1652; GISEL-DAZ-NEXT: v_fma_f32 v7, -v5, v4, 0.5 1653; GISEL-DAZ-NEXT: v_fma_f32 v4, v4, v7, v4 1654; GISEL-DAZ-NEXT: v_fma_f32 v5, v5, v7, v5 1655; GISEL-DAZ-NEXT: v_fma_f32 v7, -v4, v4, v1 1656; GISEL-DAZ-NEXT: v_fma_f32 v4, v7, v5, v4 1657; GISEL-DAZ-NEXT: v_mul_f32_e32 v7, 0x4f800000, v2 1658; GISEL-DAZ-NEXT: v_cmp_lt_f32_e64 s[4:5], v2, v3 1659; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5] 1660; GISEL-DAZ-NEXT: v_rsq_f32_e32 v3, v2 1661; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 1662; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1663; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v1, v6 1664; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc 1665; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, v2, v3 1666; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3 1667; GISEL-DAZ-NEXT: v_fma_f32 v5, -v3, v4, 0.5 1668; GISEL-DAZ-NEXT: v_fma_f32 v4, v4, v5, v4 1669; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3 1670; GISEL-DAZ-NEXT: v_fma_f32 v5, -v4, v4, v2 1671; GISEL-DAZ-NEXT: v_fma_f32 v3, v5, v3, v4 1672; GISEL-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 1673; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] 1674; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v2, v6 1675; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 1676; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 1677 %result = call <3 x float> @llvm.sqrt.v3f32(<3 x float> %x) 1678 ret <3 x float> %result 1679} 1680 1681; fpmath should be ignored 1682define float @v_sqrt_f32_ulp05(float %x) { 1683; SDAG-IEEE-LABEL: v_sqrt_f32_ulp05: 1684; SDAG-IEEE: ; %bb.0: 1685; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1686; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 1687; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 1688; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1689; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1690; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 1691; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 1692; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 1693; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 1694; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 1695; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 1696; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 1697; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 1698; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 1699; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1700; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1701; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 1702; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1703; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1704; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 1705; 1706; GISEL-IEEE-LABEL: v_sqrt_f32_ulp05: 1707; GISEL-IEEE: ; %bb.0: 1708; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1709; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 1710; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1711; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1712; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1713; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 1714; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 1715; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 1716; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 1717; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 1718; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 1719; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 1720; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 1721; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 1722; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1723; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1724; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 1725; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1726; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1727; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 1728; 1729; SDAG-DAZ-LABEL: v_sqrt_f32_ulp05: 1730; SDAG-DAZ: ; %bb.0: 1731; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1732; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 1733; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 1734; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1735; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1736; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 1737; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 1738; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 1739; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 1740; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 1741; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 1742; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 1743; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 1744; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1745; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1746; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 1747; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1748; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1749; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 1750; 1751; GISEL-DAZ-LABEL: v_sqrt_f32_ulp05: 1752; GISEL-DAZ: ; %bb.0: 1753; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1754; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 1755; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1756; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1757; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1758; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 1759; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 1760; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 1761; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 1762; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 1763; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 1764; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 1765; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 1766; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1767; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1768; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 1769; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1770; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1771; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 1772 %result = call float @llvm.sqrt.f32(float %x), !fpmath !0 1773 ret float %result 1774} 1775 1776; fpmath should be used with DAZ only 1777define float @v_sqrt_f32_ulp1(float %x) { 1778; SDAG-IEEE-LABEL: v_sqrt_f32_ulp1: 1779; SDAG-IEEE: ; %bb.0: 1780; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1781; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 1782; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 1783; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1784; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1785; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 1786; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 1787; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 1788; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 1789; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 1790; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 1791; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 1792; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 1793; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 1794; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1795; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1796; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 1797; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1798; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1799; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 1800; 1801; GISEL-IEEE-LABEL: v_sqrt_f32_ulp1: 1802; GISEL-IEEE: ; %bb.0: 1803; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1804; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 1805; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1806; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1807; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1808; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 1809; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 1810; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 1811; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 1812; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 1813; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 1814; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 1815; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 1816; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 1817; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1818; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1819; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 1820; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1821; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1822; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 1823; 1824; GCN-DAZ-LABEL: v_sqrt_f32_ulp1: 1825; GCN-DAZ: ; %bb.0: 1826; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1827; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 1828; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 1829 %result = call float @llvm.sqrt.f32(float %x), !fpmath !1 1830 ret float %result 1831} 1832 1833; fpmath should always be used 1834define float @v_sqrt_f32_ulp2(float %x) { 1835; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2: 1836; SDAG-IEEE: ; %bb.0: 1837; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1838; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 1839; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1840; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1841; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1842; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1843; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 1844; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 1845; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1846; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 1847; 1848; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2: 1849; GISEL-IEEE: ; %bb.0: 1850; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1851; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 1852; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1853; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1854; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1855; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1856; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 1857; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 1858; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1859; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 1860; 1861; GCN-DAZ-LABEL: v_sqrt_f32_ulp2: 1862; GCN-DAZ: ; %bb.0: 1863; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1864; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 1865; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 1866 %result = call float @llvm.sqrt.f32(float %x), !fpmath !2 1867 ret float %result 1868} 1869 1870; fpmath should always be used 1871define float @v_sqrt_f32_ulp25(float %x) { 1872; SDAG-IEEE-LABEL: v_sqrt_f32_ulp25: 1873; SDAG-IEEE: ; %bb.0: 1874; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1875; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 1876; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1877; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1878; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1879; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1880; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 1881; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 1882; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1883; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 1884; 1885; GISEL-IEEE-LABEL: v_sqrt_f32_ulp25: 1886; GISEL-IEEE: ; %bb.0: 1887; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1888; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 1889; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1890; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1891; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1892; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1893; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 1894; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 1895; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1896; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 1897; 1898; GCN-DAZ-LABEL: v_sqrt_f32_ulp25: 1899; GCN-DAZ: ; %bb.0: 1900; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1901; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 1902; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 1903 %result = call float @llvm.sqrt.f32(float %x), !fpmath !3 1904 ret float %result 1905} 1906 1907; fpmath should always be used 1908define float @v_sqrt_f32_ulp3(float %x) { 1909; SDAG-IEEE-LABEL: v_sqrt_f32_ulp3: 1910; SDAG-IEEE: ; %bb.0: 1911; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1912; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 1913; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1914; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1915; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1916; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1917; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 1918; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 1919; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1920; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 1921; 1922; GISEL-IEEE-LABEL: v_sqrt_f32_ulp3: 1923; GISEL-IEEE: ; %bb.0: 1924; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1925; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 1926; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 1927; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1928; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1929; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1930; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 1931; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 1932; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1933; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 1934; 1935; GCN-DAZ-LABEL: v_sqrt_f32_ulp3: 1936; GCN-DAZ: ; %bb.0: 1937; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1938; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 1939; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 1940 %result = call float @llvm.sqrt.f32(float %x), !fpmath !4 1941 ret float %result 1942} 1943 1944define float @v_sqrt_f32_ulp2_fabs(float %x) { 1945; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_fabs: 1946; SDAG-IEEE: ; %bb.0: 1947; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1948; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 1949; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 1950; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 1951; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1952; SDAG-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v1 1953; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 1954; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, s[4:5] 1955; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1956; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 1957; 1958; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_fabs: 1959; GISEL-IEEE: ; %bb.0: 1960; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1961; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 1962; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v1 1963; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 1964; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 1965; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v1 1966; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 1967; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, s[4:5] 1968; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 1969; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 1970; 1971; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_fabs: 1972; GCN-DAZ: ; %bb.0: 1973; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1974; GCN-DAZ-NEXT: v_sqrt_f32_e64 v0, |v0| 1975; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 1976 %x.fabs = call float @llvm.fabs.f32(float %x) 1977 %result = call float @llvm.sqrt.f32(float %x.fabs), !fpmath !2 1978 ret float %result 1979} 1980 1981define <2 x float> @v_sqrt_v2f32_ulp1(<2 x float> %x) { 1982; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp1: 1983; SDAG-IEEE: ; %bb.0: 1984; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1985; SDAG-IEEE-NEXT: s_mov_b32 s6, 0xf800000 1986; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1987; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0 1988; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1989; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v2, v0 1990; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 1991; SDAG-IEEE-NEXT: v_fma_f32 v4, -v3, v2, v0 1992; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 1993; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 1994; SDAG-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2 1995; SDAG-IEEE-NEXT: v_fma_f32 v2, -v4, v2, v0 1996; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2 1997; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5] 1998; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1999; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2000; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x4f800000, v1 2001; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1 2002; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 2003; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v4, v1 2004; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, 0x260 2005; SDAG-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v3 2006; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] 2007; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v4 2008; SDAG-IEEE-NEXT: v_fma_f32 v5, -v2, v4, v1 2009; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 2010; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5] 2011; SDAG-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v4 2012; SDAG-IEEE-NEXT: v_fma_f32 v4, -v5, v4, v1 2013; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4 2014; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] 2015; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v2 2016; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2017; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v3 2018; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 2019; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2020; 2021; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp1: 2022; GISEL-IEEE: ; %bb.0: 2023; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2024; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0xf800000 2025; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x4f800000, v0 2026; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 2027; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2028; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v3, v0 2029; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v3 2030; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v3, v0 2031; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v3 2032; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v3, v0 2033; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 2034; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[4:5] 2035; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v7 2036; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[4:5] 2037; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 2038; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 2039; GISEL-IEEE-NEXT: v_mul_f32_e32 v5, 0x4f800000, v1 2040; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v1, v2 2041; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 2042; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v2, v1 2043; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x260 2044; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v4 2045; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5] 2046; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 2047; GISEL-IEEE-NEXT: v_fma_f32 v5, -v3, v2, v1 2048; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v2 2049; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v2, v1 2050; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 2051; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] 2052; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v7 2053; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5] 2054; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 2055; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2056; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v4 2057; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 2058; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2059; 2060; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp1: 2061; GCN-DAZ: ; %bb.0: 2062; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2063; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2064; GCN-DAZ-NEXT: v_sqrt_f32_e32 v1, v1 2065; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2066 %result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !1 2067 ret <2 x float> %result 2068} 2069 2070; fpmath should always be used 2071define <2 x float> @v_sqrt_v2f32_ulp2(<2 x float> %x) { 2072; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2: 2073; SDAG-IEEE: ; %bb.0: 2074; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2075; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 2076; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2077; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2078; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2079; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1 2080; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2081; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] 2082; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2083; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2084; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2085; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 2086; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 2087; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2088; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5] 2089; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2090; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2091; 2092; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2: 2093; GISEL-IEEE: ; %bb.0: 2094; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2095; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 2096; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 2097; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 2098; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2 2099; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3 2100; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] 2101; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v3 2102; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2103; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2104; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2105; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 2106; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 2107; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2108; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5] 2109; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2110; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2111; 2112; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2: 2113; GCN-DAZ: ; %bb.0: 2114; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2115; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2116; GCN-DAZ-NEXT: v_sqrt_f32_e32 v1, v1 2117; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2118 %result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2 2119 ret <2 x float> %result 2120} 2121 2122define <2 x float> @v_sqrt_v2f32_ulp1_fabs(<2 x float> %x) { 2123; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp1_fabs: 2124; SDAG-IEEE: ; %bb.0: 2125; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2126; SDAG-IEEE-NEXT: s_mov_b32 s6, 0xf800000 2127; SDAG-IEEE-NEXT: s_mov_b32 s7, 0x4f800000 2128; SDAG-IEEE-NEXT: v_mul_f32_e64 v2, |v0|, s7 2129; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 2130; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, |v0|, v2, vcc 2131; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v2, v0 2132; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 2133; SDAG-IEEE-NEXT: v_fma_f32 v4, -v3, v2, v0 2134; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 2135; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 2136; SDAG-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2 2137; SDAG-IEEE-NEXT: v_fma_f32 v2, -v4, v2, v0 2138; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2 2139; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5] 2140; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 2141; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2142; SDAG-IEEE-NEXT: v_mul_f32_e64 v4, |v1|, s7 2143; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s6 2144; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, |v1|, v4, vcc 2145; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v4, v1 2146; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, 0x260 2147; SDAG-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v3 2148; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] 2149; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v4 2150; SDAG-IEEE-NEXT: v_fma_f32 v5, -v2, v4, v1 2151; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 2152; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5] 2153; SDAG-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v4 2154; SDAG-IEEE-NEXT: v_fma_f32 v4, -v5, v4, v1 2155; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4 2156; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] 2157; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v2 2158; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2159; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v3 2160; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 2161; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2162; 2163; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp1_fabs: 2164; GISEL-IEEE: ; %bb.0: 2165; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2166; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0xf800000 2167; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x4f800000 2168; GISEL-IEEE-NEXT: v_mul_f32_e64 v4, |v0|, v3 2169; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 2170; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, |v0|, v4, vcc 2171; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v4, v0 2172; GISEL-IEEE-NEXT: v_mul_f32_e64 v3, |v1|, v3 2173; GISEL-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], -1, v4 2174; GISEL-IEEE-NEXT: v_fma_f32 v6, -v5, v4, v0 2175; GISEL-IEEE-NEXT: v_add_i32_e64 v7, s[4:5], 1, v4 2176; GISEL-IEEE-NEXT: v_fma_f32 v8, -v7, v4, v0 2177; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6 2178; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] 2179; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v8 2180; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] 2181; GISEL-IEEE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 2182; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 2183; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v2 2184; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, |v1|, v3, vcc 2185; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v2, v1 2186; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, 0x260 2187; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[4:5], v0, v5 2188; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] 2189; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 2190; GISEL-IEEE-NEXT: v_fma_f32 v4, -v3, v2, v1 2191; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v2 2192; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v2, v1 2193; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 2194; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] 2195; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v7 2196; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5] 2197; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 2198; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2199; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v5 2200; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 2201; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2202; 2203; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp1_fabs: 2204; GCN-DAZ: ; %bb.0: 2205; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2206; GCN-DAZ-NEXT: v_sqrt_f32_e64 v0, |v0| 2207; GCN-DAZ-NEXT: v_sqrt_f32_e64 v1, |v1| 2208; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2209 %x.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x) 2210 %result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x.fabs), !fpmath !1 2211 ret <2 x float> %result 2212} 2213 2214; fpmath should always be used 2215define <2 x float> @v_sqrt_v2f32_ulp2_fabs(<2 x float> %x) { 2216; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_fabs: 2217; SDAG-IEEE: ; %bb.0: 2218; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2219; SDAG-IEEE-NEXT: s_mov_b32 s6, 0x800000 2220; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s6 2221; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] 2222; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2223; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[6:7], |v1|, s6 2224; SDAG-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v2 2225; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[6:7] 2226; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2227; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2228; SDAG-IEEE-NEXT: v_ldexp_f32_e64 v1, |v1|, v2 2229; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 2230; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5] 2231; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2232; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[6:7] 2233; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2234; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2235; 2236; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_fabs: 2237; GISEL-IEEE: ; %bb.0: 2238; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2239; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 2240; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, v2 2241; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] 2242; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[6:7], |v1|, v2 2243; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3 2244; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[6:7] 2245; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v0, |v0|, v3 2246; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2247; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2248; GISEL-IEEE-NEXT: v_ldexp_f32_e64 v1, |v1|, v2 2249; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 2250; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5] 2251; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2252; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[6:7] 2253; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2254; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2255; 2256; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_fabs: 2257; GCN-DAZ: ; %bb.0: 2258; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2259; GCN-DAZ-NEXT: v_sqrt_f32_e64 v0, |v0| 2260; GCN-DAZ-NEXT: v_sqrt_f32_e64 v1, |v1| 2261; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2262 %x.fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x) 2263 %result = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x.fabs), !fpmath !2 2264 ret <2 x float> %result 2265} 2266 2267; afn is stronger than the fpmath 2268define float @v_sqrt_f32_afn_ulp1(float %x) { 2269; GCN-LABEL: v_sqrt_f32_afn_ulp1: 2270; GCN: ; %bb.0: 2271; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2272; GCN-NEXT: v_sqrt_f32_e32 v0, v0 2273; GCN-NEXT: s_setpc_b64 s[30:31] 2274 %result = call afn float @llvm.sqrt.f32(float %x), !fpmath !1 2275 ret float %result 2276} 2277 2278; afn is stronger than the fpmath 2279define float @v_sqrt_f32_afn_ulp2(float %x) { 2280; GCN-LABEL: v_sqrt_f32_afn_ulp2: 2281; GCN: ; %bb.0: 2282; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2283; GCN-NEXT: v_sqrt_f32_e32 v0, v0 2284; GCN-NEXT: s_setpc_b64 s[30:31] 2285 %result = call afn float @llvm.sqrt.f32(float %x), !fpmath !2 2286 ret float %result 2287} 2288 2289define <2 x float> @v_sqrt_v2f32_afn_ulp1(<2 x float> %x) { 2290; GCN-LABEL: v_sqrt_v2f32_afn_ulp1: 2291; GCN: ; %bb.0: 2292; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2293; GCN-NEXT: v_sqrt_f32_e32 v0, v0 2294; GCN-NEXT: v_sqrt_f32_e32 v1, v1 2295; GCN-NEXT: s_setpc_b64 s[30:31] 2296 %result = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !1 2297 ret <2 x float> %result 2298} 2299 2300; fpmath should always be used 2301define <2 x float> @v_sqrt_v2f32_afn_ulp2(<2 x float> %x) { 2302; GCN-LABEL: v_sqrt_v2f32_afn_ulp2: 2303; GCN: ; %bb.0: 2304; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2305; GCN-NEXT: v_sqrt_f32_e32 v0, v0 2306; GCN-NEXT: v_sqrt_f32_e32 v1, v1 2307; GCN-NEXT: s_setpc_b64 s[30:31] 2308 %result = call afn <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2 2309 ret <2 x float> %result 2310} 2311 2312define float @v_sqrt_f32_ulp2_noncontractable_rcp(float %x) { 2313; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_rcp: 2314; SDAG-IEEE: ; %bb.0: 2315; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2316; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 2317; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2318; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2319; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2320; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2321; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2322; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 2323; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 2324; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2325; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v1, v0 2326; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2327; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 2328; SDAG-IEEE-NEXT: v_rcp_f32_e32 v1, v1 2329; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2330; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2331; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v1, v0 2332; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2333; 2334; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_rcp: 2335; GISEL-IEEE: ; %bb.0: 2336; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2337; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 2338; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2339; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 2340; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 2341; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2342; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2343; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 2344; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x7f800000 2345; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2346; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v1, v0 2347; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 2348; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 2349; GISEL-IEEE-NEXT: v_rcp_f32_e32 v1, v1 2350; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2351; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2352; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v1, v0 2353; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2354; 2355; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_noncontractable_rcp: 2356; GCN-DAZ: ; %bb.0: 2357; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2358; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2359; GCN-DAZ-NEXT: v_rcp_f32_e32 v0, v0 2360; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2361 %sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4 2362 %result = fdiv float 1.0, %sqrt, !fpmath !3 2363 ret float %result 2364} 2365 2366define float @v_sqrt_f32_ulp2_contractable_rcp(float %x) { 2367; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_rcp: 2368; SDAG-IEEE: ; %bb.0: 2369; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2370; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 2371; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2372; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc 2373; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2374; SDAG-IEEE-NEXT: v_rsq_f32_e32 v0, v0 2375; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc 2376; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2377; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2378; 2379; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_rcp: 2380; GISEL-IEEE: ; %bb.0: 2381; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2382; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 2383; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 2384; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc 2385; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2386; GISEL-IEEE-NEXT: v_rsq_f32_e32 v0, v0 2387; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc 2388; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2389; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2390; 2391; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_rcp: 2392; GCN-DAZ: ; %bb.0: 2393; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2394; GCN-DAZ-NEXT: v_rsq_f32_e32 v0, v0 2395; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2396 %sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4 2397 %result = fdiv contract float 1.0, %sqrt, !fpmath !3 2398 ret float %result 2399} 2400 2401define float @v_sqrt_f32_ulp2_noncontractable_fdiv(float %x, float %y) { 2402; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv: 2403; SDAG-IEEE: ; %bb.0: 2404; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2405; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 2406; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2407; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2408; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2409; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2410; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2411; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 2412; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 2413; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v3, v1 2414; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2415; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0 2416; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2417; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 2418; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2419; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 2420; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2421; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 2422; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2423; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2 2424; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v1, v0 2425; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 2426; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2427; 2428; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv: 2429; GISEL-IEEE: ; %bb.0: 2430; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2431; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 2432; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 2433; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2434; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2435; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2436; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2437; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 2438; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7f800000 2439; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v1 2440; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2441; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0 2442; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3 2443; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 2444; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2445; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v3 2446; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2447; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v1, v4, vcc 2448; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2449; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2 2450; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v1, v0 2451; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 2452; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2453; 2454; SDAG-DAZ-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv: 2455; SDAG-DAZ: ; %bb.0: 2456; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2457; SDAG-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2458; SDAG-DAZ-NEXT: s_mov_b32 s4, 0x6f800000 2459; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x2f800000 2460; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 2461; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc 2462; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v0, v2 2463; SDAG-DAZ-NEXT: v_rcp_f32_e32 v0, v0 2464; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v1, v0 2465; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0 2466; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 2467; 2468; GISEL-DAZ-LABEL: v_sqrt_f32_ulp2_noncontractable_fdiv: 2469; GISEL-DAZ: ; %bb.0: 2470; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2471; GISEL-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2472; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x6f800000 2473; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x2f800000 2474; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 2475; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc 2476; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v0, v2 2477; GISEL-DAZ-NEXT: v_rcp_f32_e32 v0, v0 2478; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v1, v0 2479; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0 2480; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 2481 %sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4 2482 %result = fdiv float %y, %sqrt, !fpmath !3 2483 ret float %result 2484} 2485 2486define float @v_sqrt_f32_ulp2_contractable_fdiv(float %x, float %y) { 2487; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv: 2488; SDAG-IEEE: ; %bb.0: 2489; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2490; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 2491; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2492; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2493; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2494; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2495; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2496; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 2497; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 2498; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v3, v1 2499; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2500; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0 2501; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2502; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 2503; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2504; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 2505; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2506; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 2507; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2508; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2 2509; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v1, v0 2510; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 2511; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2512; 2513; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv: 2514; GISEL-IEEE: ; %bb.0: 2515; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2516; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 2517; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 2518; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2519; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2520; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2521; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2522; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 2523; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7f800000 2524; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v1 2525; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2526; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0 2527; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3 2528; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 2529; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2530; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v3 2531; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2532; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v1, v4, vcc 2533; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2534; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, v3, v2 2535; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v1, v0 2536; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 2537; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2538; 2539; SDAG-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_fdiv: 2540; SDAG-DAZ: ; %bb.0: 2541; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2542; SDAG-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2543; SDAG-DAZ-NEXT: s_mov_b32 s4, 0x6f800000 2544; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x2f800000 2545; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 2546; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc 2547; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v0, v2 2548; SDAG-DAZ-NEXT: v_rcp_f32_e32 v0, v0 2549; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v1, v0 2550; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0 2551; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 2552; 2553; GISEL-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_fdiv: 2554; GISEL-DAZ: ; %bb.0: 2555; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2556; GISEL-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2557; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x6f800000 2558; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x2f800000 2559; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v2 2560; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, 1.0, v3, vcc 2561; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v0, v2 2562; GISEL-DAZ-NEXT: v_rcp_f32_e32 v0, v0 2563; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v1, v0 2564; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0 2565; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 2566 %sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4 2567 %result = fdiv contract float %y, %sqrt, !fpmath !3 2568 ret float %result 2569} 2570 2571define float @v_sqrt_f32_ulp2_contractable_fdiv_arcp(float %x, float %y) { 2572; SDAG-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv_arcp: 2573; SDAG-IEEE: ; %bb.0: 2574; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2575; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 2576; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2577; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2578; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2579; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2580; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2581; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 2582; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 2583; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2584; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0 2585; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2586; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 2587; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2588; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2589; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2590; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 2591; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v1, v0 2592; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2593; 2594; GISEL-IEEE-LABEL: v_sqrt_f32_ulp2_contractable_fdiv_arcp: 2595; GISEL-IEEE: ; %bb.0: 2596; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2597; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 2598; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 2599; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2600; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2601; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2602; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2603; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 2604; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7f800000 2605; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2606; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0 2607; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3 2608; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 2609; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2610; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2611; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2612; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 2613; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v1, v0 2614; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2615; 2616; GCN-DAZ-LABEL: v_sqrt_f32_ulp2_contractable_fdiv_arcp: 2617; GCN-DAZ: ; %bb.0: 2618; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2619; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2620; GCN-DAZ-NEXT: v_rcp_f32_e32 v0, v0 2621; GCN-DAZ-NEXT: v_mul_f32_e32 v0, v1, v0 2622; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2623 %sqrt = call contract float @llvm.sqrt.f32(float %x), !fpmath !4 2624 %result = fdiv arcp contract float %y, %sqrt, !fpmath !3 2625 ret float %result 2626} 2627 2628define <2 x float> @v_sqrt_v2f32_ulp2_noncontractable_rcp(<2 x float> %x) { 2629; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_noncontractable_rcp: 2630; SDAG-IEEE: ; %bb.0: 2631; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2632; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 2633; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2634; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 2635; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2636; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1 2637; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2638; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] 2639; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2640; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2641; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2642; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 2643; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 2644; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2645; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5] 2646; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 2647; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2648; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0 2649; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2650; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 2651; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2652; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2653; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2654; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 2655; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1 2656; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 2657; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 2658; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2659; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2660; SDAG-IEEE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 2661; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1 2662; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2663; 2664; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_noncontractable_rcp: 2665; GISEL-IEEE: ; %bb.0: 2666; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2667; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 2668; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 2669; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 2670; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2 2671; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v3, 5, v3 2672; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5] 2673; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v3 2674; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v2, 5, v2 2675; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2676; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2677; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 2678; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 2679; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2680; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, s[4:5] 2681; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7f800000 2682; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2683; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v0 2684; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3 2685; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 2686; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2687; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2688; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2689; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v2, v0 2690; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1 2691; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v3 2692; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 2693; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2694; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2695; GISEL-IEEE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 2696; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1 2697; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2698; 2699; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_noncontractable_rcp: 2700; GCN-DAZ: ; %bb.0: 2701; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2702; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2703; GCN-DAZ-NEXT: v_sqrt_f32_e32 v1, v1 2704; GCN-DAZ-NEXT: v_rcp_f32_e32 v0, v0 2705; GCN-DAZ-NEXT: v_rcp_f32_e32 v1, v1 2706; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2707 %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4 2708 %result = fdiv <2 x float> <float 1.0, float 1.0>, %sqrt, !fpmath !3 2709 ret <2 x float> %result 2710} 2711 2712define <2 x float> @v_sqrt_v2f32_ulp2_contractable_rcp(<2 x float> %x) { 2713; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp: 2714; SDAG-IEEE: ; %bb.0: 2715; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2716; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 2717; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2718; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 24, vcc 2719; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1 2720; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2721; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 24, s[4:5] 2722; SDAG-IEEE-NEXT: v_rsq_f32_e32 v0, v0 2723; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2724; SDAG-IEEE-NEXT: v_rsq_f32_e32 v1, v1 2725; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 12, vcc 2726; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2727; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 12, s[4:5] 2728; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2729; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2730; 2731; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp: 2732; GISEL-IEEE: ; %bb.0: 2733; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2734; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x800000 2735; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v2 2736; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, 0, 24, vcc 2737; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v2 2738; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v3 2739; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 24, s[4:5] 2740; GISEL-IEEE-NEXT: v_rsq_f32_e32 v0, v0 2741; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2742; GISEL-IEEE-NEXT: v_rsq_f32_e32 v1, v1 2743; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 12, vcc 2744; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2745; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, 12, s[4:5] 2746; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2747; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2748; 2749; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_rcp: 2750; GCN-DAZ: ; %bb.0: 2751; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2752; GCN-DAZ-NEXT: v_rsq_f32_e32 v0, v0 2753; GCN-DAZ-NEXT: v_rsq_f32_e32 v1, v1 2754; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2755 %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4 2756 %result = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt, !fpmath !3 2757 ret <2 x float> %result 2758} 2759 2760define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv(<2 x float> %x, <2 x float> %y) { 2761; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv: 2762; SDAG-IEEE: ; %bb.0: 2763; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2764; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 2765; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2766; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2767; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4 2768; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1 2769; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4 2770; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] 2771; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4 2772; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2773; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4 2774; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 2775; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, vcc 2776; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4 2777; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, s[4:5] 2778; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 2779; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4 2780; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v0 2781; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2782; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 2783; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v5, v2 2784; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, s4 2785; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2786; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v5, v2, v5, vcc 2787; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v2, v2 2788; SDAG-IEEE-NEXT: v_rcp_f32_e32 v4, v4 2789; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 2790; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1 2791; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 2792; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 2793; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2794; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, v5, v4 2795; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v4, v0 2796; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v3 2797; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, s4 2798; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2799; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc 2800; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v3, v3 2801; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, v4, v2 2802; SDAG-IEEE-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 2803; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1 2804; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2805; 2806; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv: 2807; GISEL-IEEE: ; %bb.0: 2808; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2809; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x800000 2810; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4 2811; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 2812; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4 2813; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v5, 5, v5 2814; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] 2815; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v5 2816; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4 2817; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2818; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4 2819; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 2820; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, vcc 2821; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4 2822; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, s[4:5] 2823; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, 0x7f800000 2824; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4 2825; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v0 2826; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v5 2827; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 2828; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v6, v2 2829; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, v5 2830; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2831; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v6, v2, v6, vcc 2832; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v2, v2 2833; GISEL-IEEE-NEXT: v_rcp_f32_e32 v4, v4 2834; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 2835; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1 2836; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5 2837; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 2838; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2839; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, v6, v4 2840; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v4, v0 2841; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v3 2842; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v3|, v5 2843; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2844; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc 2845; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v3, v3 2846; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, v4, v2 2847; GISEL-IEEE-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 2848; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1 2849; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2850; 2851; SDAG-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv: 2852; SDAG-DAZ: ; %bb.0: 2853; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2854; SDAG-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2855; SDAG-DAZ-NEXT: v_sqrt_f32_e32 v1, v1 2856; SDAG-DAZ-NEXT: s_mov_b32 s4, 0x6f800000 2857; SDAG-DAZ-NEXT: v_mov_b32_e32 v4, 0x2f800000 2858; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 2859; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v5, 1.0, v4, vcc 2860; SDAG-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, s4 2861; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v4, 1.0, v4, vcc 2862; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v0, v5 2863; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, v1, v4 2864; SDAG-DAZ-NEXT: v_rcp_f32_e32 v0, v0 2865; SDAG-DAZ-NEXT: v_rcp_f32_e32 v1, v1 2866; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0 2867; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, v3, v1 2868; SDAG-DAZ-NEXT: v_mul_f32_e32 v0, v5, v0 2869; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, v4, v1 2870; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 2871; 2872; GISEL-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv: 2873; GISEL-DAZ: ; %bb.0: 2874; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2875; GISEL-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2876; GISEL-DAZ-NEXT: v_sqrt_f32_e32 v1, v1 2877; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x6f800000 2878; GISEL-DAZ-NEXT: v_mov_b32_e32 v5, 0x2f800000 2879; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, v4 2880; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v6, 1.0, v5, vcc 2881; GISEL-DAZ-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, v4 2882; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v4, 1.0, v5, vcc 2883; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v0, v6 2884; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, v1, v4 2885; GISEL-DAZ-NEXT: v_rcp_f32_e32 v0, v0 2886; GISEL-DAZ-NEXT: v_rcp_f32_e32 v1, v1 2887; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0 2888; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, v3, v1 2889; GISEL-DAZ-NEXT: v_mul_f32_e32 v0, v6, v0 2890; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, v4, v1 2891; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 2892 %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4 2893 %result = fdiv contract <2 x float> %y, %sqrt, !fpmath !3 2894 ret <2 x float> %result 2895} 2896 2897define <2 x float> @v_sqrt_v2f32_ulp2_contractable_fdiv_arcp(<2 x float> %x, <2 x float> %y) { 2898; SDAG-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv_arcp: 2899; SDAG-IEEE: ; %bb.0: 2900; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2901; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 2902; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2903; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 2904; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4 2905; SDAG-IEEE-NEXT: v_cmp_gt_f32_e64 s[4:5], s4, v1 2906; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4 2907; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] 2908; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4 2909; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2910; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4 2911; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 2912; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, vcc 2913; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4 2914; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, s[4:5] 2915; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x7f800000 2916; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4 2917; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v0 2918; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2919; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 2920; SDAG-IEEE-NEXT: v_rcp_f32_e32 v4, v4 2921; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2922; SDAG-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2923; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v4, v0 2924; SDAG-IEEE-NEXT: v_mul_f32_e32 v0, v2, v0 2925; SDAG-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1 2926; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 2927; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 2928; SDAG-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2929; SDAG-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2930; SDAG-IEEE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 2931; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1 2932; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, v3, v1 2933; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 2934; 2935; GISEL-IEEE-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv_arcp: 2936; GISEL-IEEE: ; %bb.0: 2937; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2938; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x800000 2939; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v4 2940; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc 2941; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], v1, v4 2942; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v5, 5, v5 2943; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5] 2944; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v5 2945; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v4, 5, v4 2946; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 2947; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4 2948; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 2949; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, vcc 2950; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v4 2951; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v4, 0, -16, s[4:5] 2952; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, 0x7f800000 2953; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v4 2954; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v4, v0 2955; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v5 2956; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 2957; GISEL-IEEE-NEXT: v_rcp_f32_e32 v4, v4 2958; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2959; GISEL-IEEE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2960; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v4, v0 2961; GISEL-IEEE-NEXT: v_mul_f32_e32 v0, v2, v0 2962; GISEL-IEEE-NEXT: v_frexp_mant_f32_e32 v2, v1 2963; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, v5 2964; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 2965; GISEL-IEEE-NEXT: v_rcp_f32_e32 v2, v2 2966; GISEL-IEEE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2967; GISEL-IEEE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 2968; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v2, v1 2969; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, v3, v1 2970; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 2971; 2972; GCN-DAZ-LABEL: v_sqrt_v2f32_ulp2_contractable_fdiv_arcp: 2973; GCN-DAZ: ; %bb.0: 2974; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2975; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 2976; GCN-DAZ-NEXT: v_sqrt_f32_e32 v1, v1 2977; GCN-DAZ-NEXT: v_rcp_f32_e32 v0, v0 2978; GCN-DAZ-NEXT: v_rcp_f32_e32 v1, v1 2979; GCN-DAZ-NEXT: v_mul_f32_e32 v0, v2, v0 2980; GCN-DAZ-NEXT: v_mul_f32_e32 v1, v3, v1 2981; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2982 %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !4 2983 %result = fdiv arcp contract <2 x float> %y, %sqrt, !fpmath !3 2984 ret <2 x float> %result 2985} 2986 2987define amdgpu_ps i32 @s_sqrt_f32_ulp1(float inreg %x) { 2988; GCN-LABEL: s_sqrt_f32_ulp1: 2989; GCN: ; %bb.0: 2990; GCN-NEXT: v_sqrt_f32_e32 v0, s0 2991; GCN-NEXT: v_readfirstlane_b32 s0, v0 2992; GCN-NEXT: ; return to shader part epilog 2993 %result = call afn float @llvm.sqrt.f32(float %x), !fpmath !1 2994 %cast = bitcast float %result to i32 2995 %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast) 2996 ret i32 %firstlane 2997} 2998 2999define amdgpu_ps i32 @s_sqrt_f32_ulp2(float inreg %x) { 3000; GCN-LABEL: s_sqrt_f32_ulp2: 3001; GCN: ; %bb.0: 3002; GCN-NEXT: v_sqrt_f32_e32 v0, s0 3003; GCN-NEXT: v_readfirstlane_b32 s0, v0 3004; GCN-NEXT: ; return to shader part epilog 3005 %result = call afn float @llvm.sqrt.f32(float %x), !fpmath !2 3006 %cast = bitcast float %result to i32 3007 %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast) 3008 ret i32 %firstlane 3009} 3010 3011define amdgpu_ps i32 @s_sqrt_f32_ulp3(float inreg %x) { 3012; GCN-LABEL: s_sqrt_f32_ulp3: 3013; GCN: ; %bb.0: 3014; GCN-NEXT: v_sqrt_f32_e32 v0, s0 3015; GCN-NEXT: v_readfirstlane_b32 s0, v0 3016; GCN-NEXT: ; return to shader part epilog 3017 %result = call afn float @llvm.sqrt.f32(float %x), !fpmath !4 3018 %cast = bitcast float %result to i32 3019 %firstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %cast) 3020 ret i32 %firstlane 3021} 3022 3023define float @v_sqrt_f32_known_never_posdenormal_ulp2(float nofpclass(psub) %x) { 3024; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_posdenormal_ulp2: 3025; SDAG-IEEE: ; %bb.0: 3026; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3027; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 3028; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3029; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3030; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3031; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3032; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3033; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3034; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3035; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3036; 3037; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_posdenormal_ulp2: 3038; GISEL-IEEE: ; %bb.0: 3039; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3040; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 3041; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3042; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3043; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3044; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3045; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3046; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3047; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3048; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3049; 3050; GCN-DAZ-LABEL: v_sqrt_f32_known_never_posdenormal_ulp2: 3051; GCN-DAZ: ; %bb.0: 3052; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3053; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 3054; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 3055 %result = call float @llvm.sqrt.f32(float %x), !fpmath !2 3056 ret float %result 3057} 3058 3059define float @v_sqrt_f32_nsz_known_never_posdenormal_ulp2(float nofpclass(psub) %x) { 3060; SDAG-IEEE-LABEL: v_sqrt_f32_nsz_known_never_posdenormal_ulp2: 3061; SDAG-IEEE: ; %bb.0: 3062; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3063; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 3064; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3065; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3066; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3067; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3068; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3069; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3070; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3071; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3072; 3073; GISEL-IEEE-LABEL: v_sqrt_f32_nsz_known_never_posdenormal_ulp2: 3074; GISEL-IEEE: ; %bb.0: 3075; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3076; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 3077; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3078; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3079; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3080; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3081; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3082; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3083; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3084; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3085; 3086; GCN-DAZ-LABEL: v_sqrt_f32_nsz_known_never_posdenormal_ulp2: 3087; GCN-DAZ: ; %bb.0: 3088; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3089; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 3090; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 3091 %result = call nsz float @llvm.sqrt.f32(float %x), !fpmath !2 3092 ret float %result 3093} 3094 3095define float @v_sqrt_f32_known_never_negdenormal(float nofpclass(nsub) %x) { 3096; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_negdenormal: 3097; SDAG-IEEE: ; %bb.0: 3098; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3099; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 3100; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3101; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3102; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3103; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3104; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3105; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3106; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3107; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3108; 3109; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_negdenormal: 3110; GISEL-IEEE: ; %bb.0: 3111; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3112; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 3113; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3114; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3115; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3116; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3117; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3118; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3119; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3120; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3121; 3122; GCN-DAZ-LABEL: v_sqrt_f32_known_never_negdenormal: 3123; GCN-DAZ: ; %bb.0: 3124; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3125; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 3126; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 3127 %result = call float @llvm.sqrt.f32(float %x), !fpmath !2 3128 ret float %result 3129} 3130 3131define float @v_sqrt_f32_known_never_denormal(float nofpclass(sub) %x) { 3132; GCN-LABEL: v_sqrt_f32_known_never_denormal: 3133; GCN: ; %bb.0: 3134; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3135; GCN-NEXT: v_sqrt_f32_e32 v0, v0 3136; GCN-NEXT: s_setpc_b64 s[30:31] 3137 %result = call float @llvm.sqrt.f32(float %x), !fpmath !2 3138 ret float %result 3139} 3140 3141define float @v_sqrt_f32_ninf_known_never_zero(float nofpclass(zero) %x) { 3142; SDAG-IEEE-LABEL: v_sqrt_f32_ninf_known_never_zero: 3143; SDAG-IEEE: ; %bb.0: 3144; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3145; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 3146; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3147; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3148; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3149; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3150; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 3151; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3152; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 3153; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 3154; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 3155; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 3156; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 3157; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 3158; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3159; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3160; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3161; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3162; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3163; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3164; 3165; GISEL-IEEE-LABEL: v_sqrt_f32_ninf_known_never_zero: 3166; GISEL-IEEE: ; %bb.0: 3167; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3168; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 3169; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3170; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3171; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3172; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3173; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 3174; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3175; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 3176; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 3177; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 3178; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 3179; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 3180; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 3181; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3182; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3183; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3184; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3185; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3186; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3187; 3188; SDAG-DAZ-LABEL: v_sqrt_f32_ninf_known_never_zero: 3189; SDAG-DAZ: ; %bb.0: 3190; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3191; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 3192; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3193; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3194; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3195; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 3196; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 3197; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 3198; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3199; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 3200; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 3201; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 3202; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 3203; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3204; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3205; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 3206; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3207; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3208; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 3209; 3210; GISEL-DAZ-LABEL: v_sqrt_f32_ninf_known_never_zero: 3211; GISEL-DAZ: ; %bb.0: 3212; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3213; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 3214; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3215; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3216; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3217; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 3218; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 3219; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 3220; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3221; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 3222; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 3223; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 3224; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 3225; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3226; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3227; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 3228; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3229; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3230; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 3231 %result = call ninf float @llvm.sqrt.f32(float %x) 3232 ret float %result 3233} 3234 3235define float @v_sqrt_f32_known_never_zero(float nofpclass(zero) %x) { 3236; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero: 3237; SDAG-IEEE: ; %bb.0: 3238; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3239; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 3240; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3241; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3242; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3243; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3244; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 3245; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3246; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 3247; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 3248; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 3249; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 3250; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 3251; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 3252; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3253; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3254; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3255; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3256; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3257; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3258; 3259; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero: 3260; GISEL-IEEE: ; %bb.0: 3261; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3262; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 3263; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3264; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3265; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3266; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3267; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 3268; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3269; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 3270; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 3271; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 3272; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 3273; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 3274; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 3275; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3276; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3277; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3278; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3279; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3280; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3281; 3282; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero: 3283; SDAG-DAZ: ; %bb.0: 3284; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3285; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 3286; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3287; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3288; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3289; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 3290; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 3291; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 3292; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3293; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 3294; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 3295; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 3296; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 3297; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3298; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3299; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 3300; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3301; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3302; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 3303; 3304; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero: 3305; GISEL-DAZ: ; %bb.0: 3306; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3307; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 3308; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3309; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3310; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3311; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 3312; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 3313; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 3314; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3315; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 3316; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 3317; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 3318; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 3319; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3320; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3321; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 3322; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3323; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3324; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 3325 %result = call float @llvm.sqrt.f32(float %x) 3326 ret float %result 3327} 3328 3329define float @v_sqrt_f32_known_never_zero_never_inf(float nofpclass(zero inf) %x) { 3330; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_inf: 3331; SDAG-IEEE: ; %bb.0: 3332; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3333; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 3334; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3335; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3336; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3337; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3338; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 3339; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3340; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 3341; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 3342; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 3343; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 3344; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 3345; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 3346; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3347; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3348; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3349; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3350; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3351; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3352; 3353; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_inf: 3354; GISEL-IEEE: ; %bb.0: 3355; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3356; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 3357; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3358; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3359; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3360; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3361; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 3362; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3363; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 3364; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 3365; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 3366; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 3367; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 3368; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 3369; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3370; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3371; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3372; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3373; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3374; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3375; 3376; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_inf: 3377; SDAG-DAZ: ; %bb.0: 3378; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3379; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 3380; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3381; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3382; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3383; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 3384; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 3385; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 3386; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3387; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 3388; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 3389; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 3390; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 3391; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3392; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3393; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 3394; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3395; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3396; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 3397; 3398; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_inf: 3399; GISEL-DAZ: ; %bb.0: 3400; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3401; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 3402; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3403; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3404; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3405; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 3406; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 3407; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 3408; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3409; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 3410; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 3411; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 3412; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 3413; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3414; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3415; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 3416; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3417; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3418; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 3419 %result = call float @llvm.sqrt.f32(float %x) 3420 ret float %result 3421} 3422 3423define float @v_sqrt_f32_known_never_zero_never_ninf(float nofpclass(zero ninf) %x) { 3424; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf: 3425; SDAG-IEEE: ; %bb.0: 3426; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3427; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 3428; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3429; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3430; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3431; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3432; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 3433; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3434; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 3435; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 3436; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 3437; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 3438; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 3439; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 3440; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3441; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3442; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3443; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3444; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3445; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3446; 3447; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf: 3448; GISEL-IEEE: ; %bb.0: 3449; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3450; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 3451; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3452; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3453; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3454; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3455; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 3456; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3457; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 3458; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 3459; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 3460; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 3461; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 3462; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 3463; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3464; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3465; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3466; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3467; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3468; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3469; 3470; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_ninf: 3471; SDAG-DAZ: ; %bb.0: 3472; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3473; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 3474; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3475; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3476; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3477; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 3478; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 3479; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 3480; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3481; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 3482; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 3483; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 3484; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 3485; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3486; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3487; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 3488; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3489; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3490; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 3491; 3492; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_ninf: 3493; GISEL-DAZ: ; %bb.0: 3494; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3495; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 3496; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3497; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3498; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3499; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 3500; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 3501; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 3502; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3503; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 3504; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 3505; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 3506; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 3507; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3508; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3509; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 3510; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3511; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3512; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 3513 %result = call float @llvm.sqrt.f32(float %x) 3514 ret float %result 3515} 3516 3517define float @v_sqrt_f32_known_never_zero_never_pinf(float nofpclass(zero pinf) %x) { 3518; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_pinf: 3519; SDAG-IEEE: ; %bb.0: 3520; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3521; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 3522; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3523; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3524; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3525; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3526; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 3527; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3528; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 3529; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 3530; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 3531; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 3532; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 3533; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 3534; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3535; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3536; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3537; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3538; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3539; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3540; 3541; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_pinf: 3542; GISEL-IEEE: ; %bb.0: 3543; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3544; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 3545; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3546; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3547; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3548; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3549; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 3550; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3551; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v1 3552; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 3553; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 3554; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[4:5] 3555; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v5 3556; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[4:5] 3557; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3558; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3559; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3560; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3561; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3562; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3563; 3564; SDAG-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_pinf: 3565; SDAG-DAZ: ; %bb.0: 3566; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3567; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 3568; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3569; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3570; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3571; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 3572; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 3573; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 3574; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3575; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 3576; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 3577; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 3578; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 3579; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3580; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3581; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 3582; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3583; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3584; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 3585; 3586; GISEL-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_pinf: 3587; GISEL-DAZ: ; %bb.0: 3588; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3589; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 3590; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3591; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3592; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3593; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 3594; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 3595; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 3596; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3597; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 3598; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 3599; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 3600; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 3601; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3602; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3603; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 3604; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3605; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3606; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 3607 %result = call float @llvm.sqrt.f32(float %x) 3608 ret float %result 3609} 3610 3611define float @v_sqrt_f32_frexp_src(float %x) { 3612; SDAG-LABEL: v_sqrt_f32_frexp_src: 3613; SDAG: ; %bb.0: 3614; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3615; SDAG-NEXT: s_mov_b32 s4, 0x7f800000 3616; SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0 3617; SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 3618; SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3619; SDAG-NEXT: s_mov_b32 s4, 0xf800000 3620; SDAG-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3621; SDAG-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3622; SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3623; SDAG-NEXT: v_rsq_f32_e32 v1, v0 3624; SDAG-NEXT: v_mul_f32_e32 v2, v0, v1 3625; SDAG-NEXT: v_mul_f32_e32 v1, 0.5, v1 3626; SDAG-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3627; SDAG-NEXT: v_fma_f32 v2, v2, v3, v2 3628; SDAG-NEXT: v_fma_f32 v4, -v2, v2, v0 3629; SDAG-NEXT: v_fma_f32 v1, v1, v3, v1 3630; SDAG-NEXT: v_fma_f32 v1, v4, v1, v2 3631; SDAG-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3632; SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3633; SDAG-NEXT: v_mov_b32_e32 v2, 0x260 3634; SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3635; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3636; SDAG-NEXT: s_setpc_b64 s[30:31] 3637; 3638; GISEL-LABEL: v_sqrt_f32_frexp_src: 3639; GISEL: ; %bb.0: 3640; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3641; GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 3642; GISEL-NEXT: v_frexp_mant_f32_e32 v1, v0 3643; GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 3644; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3645; GISEL-NEXT: v_mov_b32_e32 v1, 0xf800000 3646; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3647; GISEL-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3648; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 3649; GISEL-NEXT: v_rsq_f32_e32 v1, v0 3650; GISEL-NEXT: v_mul_f32_e32 v2, v0, v1 3651; GISEL-NEXT: v_mul_f32_e32 v1, 0.5, v1 3652; GISEL-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3653; GISEL-NEXT: v_fma_f32 v2, v2, v3, v2 3654; GISEL-NEXT: v_fma_f32 v1, v1, v3, v1 3655; GISEL-NEXT: v_fma_f32 v3, -v2, v2, v0 3656; GISEL-NEXT: v_fma_f32 v1, v3, v1, v2 3657; GISEL-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3658; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3659; GISEL-NEXT: v_mov_b32_e32 v2, 0x260 3660; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3661; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3662; GISEL-NEXT: s_setpc_b64 s[30:31] 3663 %frexp = call { float, i32 } @llvm.frexp.f32.i32(float %x) 3664 %frexp.mant = extractvalue { float, i32 } %frexp, 0 3665 %result = call float @llvm.sqrt.f32(float %frexp.mant) 3666 ret float %result 3667} 3668 3669define float @v_sqrt_f32_ulp3_frexp_src(float %x) { 3670; SDAG-LABEL: v_sqrt_f32_ulp3_frexp_src: 3671; SDAG: ; %bb.0: 3672; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3673; SDAG-NEXT: s_mov_b32 s4, 0x7f800000 3674; SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0 3675; SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 3676; SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3677; SDAG-NEXT: v_sqrt_f32_e32 v0, v0 3678; SDAG-NEXT: s_setpc_b64 s[30:31] 3679; 3680; GISEL-LABEL: v_sqrt_f32_ulp3_frexp_src: 3681; GISEL: ; %bb.0: 3682; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3683; GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 3684; GISEL-NEXT: v_frexp_mant_f32_e32 v1, v0 3685; GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 3686; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3687; GISEL-NEXT: v_sqrt_f32_e32 v0, v0 3688; GISEL-NEXT: s_setpc_b64 s[30:31] 3689 %frexp = call { float, i32 } @llvm.frexp.f32.i32(float %x) 3690 %frexp.mant = extractvalue { float, i32 } %frexp, 0 3691 %result = call float @llvm.sqrt.f32(float %frexp.mant), !fpmath !4 3692 ret float %result 3693} 3694 3695define float @v_sqrt_f32_known_never_zero_never_ninf_ulp2(float nofpclass(zero ninf) %x) { 3696; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf_ulp2: 3697; SDAG-IEEE: ; %bb.0: 3698; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3699; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 3700; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3701; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3702; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3703; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3704; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3705; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3706; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3707; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3708; 3709; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_zero_never_ninf_ulp2: 3710; GISEL-IEEE: ; %bb.0: 3711; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3712; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 3713; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3714; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3715; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3716; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3717; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3718; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3719; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3720; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3721; 3722; GCN-DAZ-LABEL: v_sqrt_f32_known_never_zero_never_ninf_ulp2: 3723; GCN-DAZ: ; %bb.0: 3724; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3725; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 3726; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 3727 %result = call float @llvm.sqrt.f32(float %x), !fpmath !2 3728 ret float %result 3729} 3730 3731define float @v_sqrt_f32_known_never_ninf_ulp2(float nofpclass(ninf) %x) { 3732; SDAG-IEEE-LABEL: v_sqrt_f32_known_never_ninf_ulp2: 3733; SDAG-IEEE: ; %bb.0: 3734; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3735; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 3736; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3737; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3738; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3739; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3740; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3741; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3742; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3743; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3744; 3745; GISEL-IEEE-LABEL: v_sqrt_f32_known_never_ninf_ulp2: 3746; GISEL-IEEE: ; %bb.0: 3747; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3748; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 3749; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3750; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3751; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3752; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3753; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3754; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3755; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3756; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3757; 3758; GCN-DAZ-LABEL: v_sqrt_f32_known_never_ninf_ulp2: 3759; GCN-DAZ: ; %bb.0: 3760; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3761; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 3762; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 3763 %result = call float @llvm.sqrt.f32(float %x), !fpmath !2 3764 ret float %result 3765} 3766 3767define float @v_sqrt_f32_nsz_known_never_ninf_ulp2(float nofpclass(ninf) %x) { 3768; SDAG-IEEE-LABEL: v_sqrt_f32_nsz_known_never_ninf_ulp2: 3769; SDAG-IEEE: ; %bb.0: 3770; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3771; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 3772; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3773; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3774; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3775; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3776; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3777; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3778; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3779; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3780; 3781; GISEL-IEEE-LABEL: v_sqrt_f32_nsz_known_never_ninf_ulp2: 3782; GISEL-IEEE: ; %bb.0: 3783; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3784; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 3785; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3786; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3787; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3788; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3789; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v0, v0 3790; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, -16, vcc 3791; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v0, v0, v1 3792; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3793; 3794; GCN-DAZ-LABEL: v_sqrt_f32_nsz_known_never_ninf_ulp2: 3795; GCN-DAZ: ; %bb.0: 3796; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3797; GCN-DAZ-NEXT: v_sqrt_f32_e32 v0, v0 3798; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 3799 %result = call nsz float @llvm.sqrt.f32(float %x), !fpmath !2 3800 ret float %result 3801} 3802 3803define float @v_elim_redun_check_ult_sqrt(float %in) { 3804; SDAG-IEEE-LABEL: v_elim_redun_check_ult_sqrt: 3805; SDAG-IEEE: ; %bb.0: 3806; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3807; SDAG-IEEE-NEXT: s_mov_b32 s4, 0xf800000 3808; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3809; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3810; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3811; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3812; SDAG-IEEE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 3813; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3814; SDAG-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 3815; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 3816; SDAG-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 3817; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 3818; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 3819; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 3820; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3821; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3822; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3823; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3824; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3825; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3826; 3827; GISEL-IEEE-LABEL: v_elim_redun_check_ult_sqrt: 3828; GISEL-IEEE: ; %bb.0: 3829; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3830; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0xf800000 3831; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3832; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3833; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v0, v2, vcc 3834; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v2, v1 3835; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 3836; GISEL-IEEE-NEXT: v_fma_f32 v4, -v3, v2, v1 3837; GISEL-IEEE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v2 3838; GISEL-IEEE-NEXT: v_fma_f32 v6, -v5, v2, v1 3839; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 3840; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] 3841; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v6 3842; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] 3843; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 3844; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 3845; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x260 3846; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v1, v3 3847; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 3848; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v2, 1 3849; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7fc00000 3850; GISEL-IEEE-NEXT: v_cmp_nge_f32_e32 vcc, v0, v2 3851; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3852; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3853; 3854; SDAG-DAZ-LABEL: v_elim_redun_check_ult_sqrt: 3855; SDAG-DAZ: ; %bb.0: 3856; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3857; SDAG-DAZ-NEXT: s_mov_b32 s4, 0xf800000 3858; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 3859; SDAG-DAZ-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3860; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 3861; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 3862; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 3863; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 3864; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 3865; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 3866; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 3867; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 3868; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 3869; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3870; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 3871; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 3872; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3873; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3874; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 3875; 3876; GISEL-DAZ-LABEL: v_elim_redun_check_ult_sqrt: 3877; GISEL-DAZ: ; %bb.0: 3878; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3879; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0xf800000 3880; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 3881; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3882; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v0, v2, vcc 3883; GISEL-DAZ-NEXT: v_rsq_f32_e32 v2, v1 3884; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, v1, v2 3885; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0.5, v2 3886; GISEL-DAZ-NEXT: v_fma_f32 v4, -v2, v3, 0.5 3887; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v4, v3 3888; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v4, v2 3889; GISEL-DAZ-NEXT: v_fma_f32 v4, -v3, v3, v1 3890; GISEL-DAZ-NEXT: v_fma_f32 v2, v4, v2, v3 3891; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 3892; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 3893; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x260 3894; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v1, v3 3895; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 3896; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v2, 1 3897; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x7fc00000 3898; GISEL-DAZ-NEXT: v_cmp_nge_f32_e32 vcc, v0, v2 3899; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3900; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 3901 %sqrt = call float @llvm.sqrt.f32(float %in) 3902 %cmp = fcmp ult float %in, -0.000000e+00 3903 %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt 3904 ret float %res 3905} 3906 3907define float @v_elim_redun_check_ult_sqrt_ulp3(float %in) { 3908; SDAG-IEEE-LABEL: v_elim_redun_check_ult_sqrt_ulp3: 3909; SDAG-IEEE: ; %bb.0: 3910; SDAG-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3911; SDAG-IEEE-NEXT: s_mov_b32 s4, 0x800000 3912; SDAG-IEEE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 3913; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3914; SDAG-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3915; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v0, v1 3916; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 3917; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 3918; SDAG-IEEE-NEXT: s_brev_b32 s4, 1 3919; SDAG-IEEE-NEXT: v_cmp_le_f32_e32 vcc, s4, v0 3920; SDAG-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 3921; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x7fc00000 3922; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3923; SDAG-IEEE-NEXT: s_setpc_b64 s[30:31] 3924; 3925; GISEL-IEEE-LABEL: v_elim_redun_check_ult_sqrt_ulp3: 3926; GISEL-IEEE: ; %bb.0: 3927; GISEL-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3928; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x800000 3929; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, v0, v1 3930; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 3931; GISEL-IEEE-NEXT: v_lshlrev_b32_e32 v1, 5, v1 3932; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v0, v1 3933; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v1 3934; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, 0, -16, vcc 3935; GISEL-IEEE-NEXT: v_mov_b32_e32 v3, 0x7fc00000 3936; GISEL-IEEE-NEXT: v_ldexp_f32_e32 v1, v1, v2 3937; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v2, 1 3938; GISEL-IEEE-NEXT: v_cmp_nge_f32_e32 vcc, v0, v2 3939; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3940; GISEL-IEEE-NEXT: s_setpc_b64 s[30:31] 3941; 3942; SDAG-DAZ-LABEL: v_elim_redun_check_ult_sqrt_ulp3: 3943; SDAG-DAZ: ; %bb.0: 3944; SDAG-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3945; SDAG-DAZ-NEXT: v_sqrt_f32_e32 v1, v0 3946; SDAG-DAZ-NEXT: s_brev_b32 s4, 1 3947; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000 3948; SDAG-DAZ-NEXT: v_cmp_le_f32_e32 vcc, s4, v0 3949; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 3950; SDAG-DAZ-NEXT: s_setpc_b64 s[30:31] 3951; 3952; GISEL-DAZ-LABEL: v_elim_redun_check_ult_sqrt_ulp3: 3953; GISEL-DAZ: ; %bb.0: 3954; GISEL-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3955; GISEL-DAZ-NEXT: v_sqrt_f32_e32 v1, v0 3956; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v2, 1 3957; GISEL-DAZ-NEXT: v_mov_b32_e32 v3, 0x7fc00000 3958; GISEL-DAZ-NEXT: v_cmp_nge_f32_e32 vcc, v0, v2 3959; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v3, vcc 3960; GISEL-DAZ-NEXT: s_setpc_b64 s[30:31] 3961 %sqrt = call float @llvm.sqrt.f32(float %in), !fpmath !4 3962 %cmp = fcmp ult float %in, -0.000000e+00 3963 %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt 3964 ret float %res 3965} 3966 3967define amdgpu_kernel void @elim_redun_check_neg0(ptr addrspace(1) %out, float %in) { 3968; SDAG-IEEE-LABEL: elim_redun_check_neg0: 3969; SDAG-IEEE: ; %bb.0: ; %entry 3970; SDAG-IEEE-NEXT: s_load_dword s0, s[4:5], 0xb 3971; SDAG-IEEE-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 3972; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 3973; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 3974; SDAG-IEEE-NEXT: s_mov_b32 s7, 0xf000 3975; SDAG-IEEE-NEXT: s_waitcnt lgkmcnt(0) 3976; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s0, v1 3977; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, s0 3978; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, v0 3979; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1] 3980; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 3981; SDAG-IEEE-NEXT: s_mov_b32 s6, -1 3982; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v1 3983; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 3984; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3 3985; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 3986; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v1 3987; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 3988; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 3989; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 3990; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 3991; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 3992; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 3993; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 3994; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 3995; SDAG-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0 3996; SDAG-IEEE-NEXT: s_endpgm 3997; 3998; GISEL-IEEE-LABEL: elim_redun_check_neg0: 3999; GISEL-IEEE: ; %bb.0: ; %entry 4000; GISEL-IEEE-NEXT: s_load_dword s2, s[4:5], 0xb 4001; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 4002; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 4003; GISEL-IEEE-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 4004; GISEL-IEEE-NEXT: s_mov_b32 s6, -1 4005; GISEL-IEEE-NEXT: s_waitcnt lgkmcnt(0) 4006; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, s2 4007; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, s2, v1 4008; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 4009; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4010; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 4011; GISEL-IEEE-NEXT: s_mov_b32 s7, 0xf000 4012; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[0:1], -1, v1 4013; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 4014; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], 1, v1 4015; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 4016; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v3 4017; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 4018; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v5 4019; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] 4020; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 4021; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 4022; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 4023; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 4024; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 4025; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v1, 1 4026; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x7fc00000 4027; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1 4028; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 4029; GISEL-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0 4030; GISEL-IEEE-NEXT: s_endpgm 4031; 4032; SDAG-DAZ-LABEL: elim_redun_check_neg0: 4033; SDAG-DAZ: ; %bb.0: ; %entry 4034; SDAG-DAZ-NEXT: s_load_dword s0, s[4:5], 0xb 4035; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 4036; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 4037; SDAG-DAZ-NEXT: s_mov_b32 s3, 0xf000 4038; SDAG-DAZ-NEXT: s_mov_b32 s2, -1 4039; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4040; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s0, v1 4041; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, s0 4042; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 4043; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4044; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 4045; SDAG-DAZ-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 4046; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 4047; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 4048; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 4049; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 4050; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 4051; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 4052; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 4053; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 4054; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 4055; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 4056; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 4057; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 4058; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4059; SDAG-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0 4060; SDAG-DAZ-NEXT: s_endpgm 4061; 4062; GISEL-DAZ-LABEL: elim_redun_check_neg0: 4063; GISEL-DAZ: ; %bb.0: ; %entry 4064; GISEL-DAZ-NEXT: s_load_dword s2, s[4:5], 0xb 4065; GISEL-DAZ-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 4066; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 4067; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 4068; GISEL-DAZ-NEXT: s_mov_b32 s3, 0xf000 4069; GISEL-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4070; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, s2 4071; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1 4072; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 4073; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4074; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 4075; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 4076; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 4077; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 4078; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 4079; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 4080; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 4081; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 4082; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 4083; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 4084; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 4085; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 4086; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 4087; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v1, 1 4088; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000 4089; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v1 4090; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 4091; GISEL-DAZ-NEXT: s_mov_b32 s2, -1 4092; GISEL-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0 4093; GISEL-DAZ-NEXT: s_endpgm 4094entry: 4095 %sqrt = call float @llvm.sqrt.f32(float %in) 4096 %cmp = fcmp olt float %in, -0.000000e+00 4097 %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt 4098 store float %res, ptr addrspace(1) %out 4099 ret void 4100} 4101 4102define amdgpu_kernel void @elim_redun_check_pos0(ptr addrspace(1) %out, float %in) { 4103; SDAG-IEEE-LABEL: elim_redun_check_pos0: 4104; SDAG-IEEE: ; %bb.0: ; %entry 4105; SDAG-IEEE-NEXT: s_load_dword s0, s[4:5], 0xb 4106; SDAG-IEEE-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 4107; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 4108; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 4109; SDAG-IEEE-NEXT: s_mov_b32 s7, 0xf000 4110; SDAG-IEEE-NEXT: s_waitcnt lgkmcnt(0) 4111; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s0, v1 4112; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, s0 4113; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, v0 4114; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1] 4115; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 4116; SDAG-IEEE-NEXT: s_mov_b32 s6, -1 4117; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v1 4118; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 4119; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3 4120; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 4121; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v1 4122; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 4123; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 4124; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 4125; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 4126; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 4127; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 4128; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 4129; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 4130; SDAG-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0 4131; SDAG-IEEE-NEXT: s_endpgm 4132; 4133; GISEL-IEEE-LABEL: elim_redun_check_pos0: 4134; GISEL-IEEE: ; %bb.0: ; %entry 4135; GISEL-IEEE-NEXT: s_load_dword s2, s[4:5], 0xb 4136; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 4137; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 4138; GISEL-IEEE-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 4139; GISEL-IEEE-NEXT: s_mov_b32 s6, -1 4140; GISEL-IEEE-NEXT: s_waitcnt lgkmcnt(0) 4141; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, s2 4142; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, s2, v1 4143; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 4144; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4145; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 4146; GISEL-IEEE-NEXT: s_mov_b32 s7, 0xf000 4147; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[0:1], -1, v1 4148; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 4149; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], 1, v1 4150; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 4151; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v3 4152; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 4153; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v5 4154; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] 4155; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 4156; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 4157; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 4158; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 4159; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 4160; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 4161; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 vcc, s2, 0 4162; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 4163; GISEL-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0 4164; GISEL-IEEE-NEXT: s_endpgm 4165; 4166; SDAG-DAZ-LABEL: elim_redun_check_pos0: 4167; SDAG-DAZ: ; %bb.0: ; %entry 4168; SDAG-DAZ-NEXT: s_load_dword s0, s[4:5], 0xb 4169; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 4170; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 4171; SDAG-DAZ-NEXT: s_mov_b32 s3, 0xf000 4172; SDAG-DAZ-NEXT: s_mov_b32 s2, -1 4173; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4174; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s0, v1 4175; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, s0 4176; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 4177; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4178; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 4179; SDAG-DAZ-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 4180; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 4181; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 4182; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 4183; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 4184; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 4185; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 4186; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 4187; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 4188; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 4189; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 4190; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 4191; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 4192; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4193; SDAG-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0 4194; SDAG-DAZ-NEXT: s_endpgm 4195; 4196; GISEL-DAZ-LABEL: elim_redun_check_pos0: 4197; GISEL-DAZ: ; %bb.0: ; %entry 4198; GISEL-DAZ-NEXT: s_load_dword s2, s[4:5], 0xb 4199; GISEL-DAZ-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 4200; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 4201; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 4202; GISEL-DAZ-NEXT: s_mov_b32 s3, 0xf000 4203; GISEL-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4204; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, s2 4205; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1 4206; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 4207; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4208; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 4209; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 4210; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 4211; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 4212; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 4213; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 4214; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 4215; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 4216; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 4217; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 4218; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 4219; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 4220; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 4221; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000 4222; GISEL-DAZ-NEXT: v_cmp_lt_f32_e64 vcc, s2, 0 4223; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 4224; GISEL-DAZ-NEXT: s_mov_b32 s2, -1 4225; GISEL-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0 4226; GISEL-DAZ-NEXT: s_endpgm 4227entry: 4228 %sqrt = call float @llvm.sqrt.f32(float %in) 4229 %cmp = fcmp olt float %in, 0.000000e+00 4230 %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt 4231 store float %res, ptr addrspace(1) %out 4232 ret void 4233} 4234 4235define amdgpu_kernel void @elim_redun_check_ult(ptr addrspace(1) %out, float %in) { 4236; SDAG-IEEE-LABEL: elim_redun_check_ult: 4237; SDAG-IEEE: ; %bb.0: ; %entry 4238; SDAG-IEEE-NEXT: s_load_dword s0, s[4:5], 0xb 4239; SDAG-IEEE-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 4240; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 4241; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 4242; SDAG-IEEE-NEXT: s_mov_b32 s7, 0xf000 4243; SDAG-IEEE-NEXT: s_waitcnt lgkmcnt(0) 4244; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s0, v1 4245; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, s0 4246; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, v0 4247; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1] 4248; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 4249; SDAG-IEEE-NEXT: s_mov_b32 s6, -1 4250; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v1 4251; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 4252; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3 4253; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 4254; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v1 4255; SDAG-IEEE-NEXT: v_fma_f32 v1, -v3, v1, v0 4256; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 4257; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 4258; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 4259; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 4260; SDAG-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 4261; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 4262; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 4263; SDAG-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0 4264; SDAG-IEEE-NEXT: s_endpgm 4265; 4266; GISEL-IEEE-LABEL: elim_redun_check_ult: 4267; GISEL-IEEE: ; %bb.0: ; %entry 4268; GISEL-IEEE-NEXT: s_load_dword s2, s[4:5], 0xb 4269; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 4270; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 4271; GISEL-IEEE-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 4272; GISEL-IEEE-NEXT: s_mov_b32 s6, -1 4273; GISEL-IEEE-NEXT: s_waitcnt lgkmcnt(0) 4274; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, s2 4275; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, s2, v1 4276; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 4277; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4278; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 4279; GISEL-IEEE-NEXT: s_mov_b32 s7, 0xf000 4280; GISEL-IEEE-NEXT: v_add_i32_e64 v2, s[0:1], -1, v1 4281; GISEL-IEEE-NEXT: v_fma_f32 v3, -v2, v1, v0 4282; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], 1, v1 4283; GISEL-IEEE-NEXT: v_fma_f32 v5, -v4, v1, v0 4284; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v3 4285; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 4286; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v5 4287; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[0:1] 4288; GISEL-IEEE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 4289; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 4290; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x260 4291; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 4292; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 4293; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v1, 1 4294; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, 0x7fc00000 4295; GISEL-IEEE-NEXT: v_cmp_nge_f32_e32 vcc, s2, v1 4296; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 4297; GISEL-IEEE-NEXT: buffer_store_dword v0, off, s[4:7], 0 4298; GISEL-IEEE-NEXT: s_endpgm 4299; 4300; SDAG-DAZ-LABEL: elim_redun_check_ult: 4301; SDAG-DAZ: ; %bb.0: ; %entry 4302; SDAG-DAZ-NEXT: s_load_dword s0, s[4:5], 0xb 4303; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 4304; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 4305; SDAG-DAZ-NEXT: s_mov_b32 s3, 0xf000 4306; SDAG-DAZ-NEXT: s_mov_b32 s2, -1 4307; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4308; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s0, v1 4309; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, s0 4310; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 4311; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4312; SDAG-DAZ-NEXT: v_rsq_f32_e32 v1, v0 4313; SDAG-DAZ-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 4314; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 4315; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 4316; SDAG-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 4317; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 4318; SDAG-DAZ-NEXT: v_fma_f32 v4, -v2, v2, v0 4319; SDAG-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 4320; SDAG-DAZ-NEXT: v_fma_f32 v1, v4, v1, v2 4321; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 4322; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 4323; SDAG-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 4324; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 4325; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 4326; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4327; SDAG-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0 4328; SDAG-DAZ-NEXT: s_endpgm 4329; 4330; GISEL-DAZ-LABEL: elim_redun_check_ult: 4331; GISEL-DAZ: ; %bb.0: ; %entry 4332; GISEL-DAZ-NEXT: s_load_dword s2, s[4:5], 0xb 4333; GISEL-DAZ-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 4334; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 4335; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 4336; GISEL-DAZ-NEXT: s_mov_b32 s3, 0xf000 4337; GISEL-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4338; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, s2 4339; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1 4340; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 4341; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 4342; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 4343; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, v0, v1 4344; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 4345; GISEL-DAZ-NEXT: v_fma_f32 v3, -v1, v2, 0.5 4346; GISEL-DAZ-NEXT: v_fma_f32 v2, v2, v3, v2 4347; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v3, v1 4348; GISEL-DAZ-NEXT: v_fma_f32 v3, -v2, v2, v0 4349; GISEL-DAZ-NEXT: v_fma_f32 v1, v3, v1, v2 4350; GISEL-DAZ-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 4351; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 4352; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x260 4353; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 4354; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 4355; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v1, 1 4356; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, 0x7fc00000 4357; GISEL-DAZ-NEXT: v_cmp_nge_f32_e32 vcc, s2, v1 4358; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 4359; GISEL-DAZ-NEXT: s_mov_b32 s2, -1 4360; GISEL-DAZ-NEXT: buffer_store_dword v0, off, s[0:3], 0 4361; GISEL-DAZ-NEXT: s_endpgm 4362entry: 4363 %sqrt = call float @llvm.sqrt.f32(float %in) 4364 %cmp = fcmp ult float %in, -0.000000e+00 4365 %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt 4366 store float %res, ptr addrspace(1) %out 4367 ret void 4368} 4369 4370define amdgpu_kernel void @elim_redun_check_v2(ptr addrspace(1) %out, <2 x float> %in) { 4371; SDAG-IEEE-LABEL: elim_redun_check_v2: 4372; SDAG-IEEE: ; %bb.0: ; %entry 4373; SDAG-IEEE-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 4374; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 4375; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 4376; SDAG-IEEE-NEXT: s_mov_b32 s7, 0xf000 4377; SDAG-IEEE-NEXT: s_mov_b32 s6, -1 4378; SDAG-IEEE-NEXT: s_waitcnt lgkmcnt(0) 4379; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, s11, v1 4380; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, s11 4381; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0 4382; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1] 4383; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v3, v2 4384; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s10, v1 4385; SDAG-IEEE-NEXT: s_mov_b32 s4, s8 4386; SDAG-IEEE-NEXT: s_mov_b32 s5, s9 4387; SDAG-IEEE-NEXT: v_add_i32_e32 v4, vcc, -1, v3 4388; SDAG-IEEE-NEXT: v_fma_f32 v5, -v4, v3, v2 4389; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v5 4390; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc 4391; SDAG-IEEE-NEXT: v_add_i32_e32 v5, vcc, 1, v3 4392; SDAG-IEEE-NEXT: v_fma_f32 v3, -v5, v3, v2 4393; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v3 4394; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc 4395; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 4396; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] 4397; SDAG-IEEE-NEXT: v_mov_b32_e32 v5, s10 4398; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 4399; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v5, v1, s[0:1] 4400; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v5, v0 4401; SDAG-IEEE-NEXT: v_mov_b32_e32 v4, 0x260 4402; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v2, v4 4403; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc 4404; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v5 4405; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v5, v0 4406; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3 4407; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc 4408; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v5 4409; SDAG-IEEE-NEXT: v_fma_f32 v5, -v3, v5, v0 4410; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v5 4411; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 4412; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 4413; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] 4414; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v4 4415; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 4416; SDAG-IEEE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4417; SDAG-IEEE-NEXT: s_endpgm 4418; 4419; GISEL-IEEE-LABEL: elim_redun_check_v2: 4420; GISEL-IEEE: ; %bb.0: ; %entry 4421; GISEL-IEEE-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 4422; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 4423; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 4424; GISEL-IEEE-NEXT: s_waitcnt lgkmcnt(0) 4425; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, s6 4426; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, s6, v1 4427; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 4428; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 4429; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v3, v2 4430; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, s7, v1 4431; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], -1, v3 4432; GISEL-IEEE-NEXT: v_add_i32_e64 v5, s[0:1], 1, v3 4433; GISEL-IEEE-NEXT: v_fma_f32 v6, -v4, v3, v2 4434; GISEL-IEEE-NEXT: v_fma_f32 v7, -v5, v3, v2 4435; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v6 4436; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] 4437; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v7 4438; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] 4439; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 4440; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 4441; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, s7 4442; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 4443; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc 4444; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 4445; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x260 4446; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v4 4447; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1] 4448; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[0:1], -1, v1 4449; GISEL-IEEE-NEXT: v_fma_f32 v5, -v3, v1, v0 4450; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[0:1], 1, v1 4451; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v1, v0 4452; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v5 4453; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 4454; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v7 4455; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[0:1] 4456; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v1 4457; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 4458; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v4 4459; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v3, 1 4460; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 4461; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x7fc00000 4462; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3 4463; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 4464; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s7, v3 4465; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 4466; GISEL-IEEE-NEXT: s_mov_b32 s6, -1 4467; GISEL-IEEE-NEXT: s_mov_b32 s7, 0xf000 4468; GISEL-IEEE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4469; GISEL-IEEE-NEXT: s_endpgm 4470; 4471; SDAG-DAZ-LABEL: elim_redun_check_v2: 4472; SDAG-DAZ: ; %bb.0: ; %entry 4473; SDAG-DAZ-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4474; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 4475; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 4476; SDAG-DAZ-NEXT: s_mov_b32 s7, 0xf000 4477; SDAG-DAZ-NEXT: s_mov_b32 s6, -1 4478; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4479; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, s3, v1 4480; SDAG-DAZ-NEXT: v_mov_b32_e32 v3, s3 4481; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 4482; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 4483; SDAG-DAZ-NEXT: v_rsq_f32_e32 v3, v2 4484; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1 4485; SDAG-DAZ-NEXT: s_mov_b32 s4, s0 4486; SDAG-DAZ-NEXT: s_mov_b32 s5, s1 4487; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, v2, v3 4488; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3 4489; SDAG-DAZ-NEXT: v_fma_f32 v5, -v3, v4, 0.5 4490; SDAG-DAZ-NEXT: v_fma_f32 v4, v4, v5, v4 4491; SDAG-DAZ-NEXT: v_fma_f32 v6, -v4, v4, v2 4492; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3 4493; SDAG-DAZ-NEXT: v_fma_f32 v3, v6, v3, v4 4494; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 4495; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 4496; SDAG-DAZ-NEXT: v_mov_b32_e32 v4, s2 4497; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 4498; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 4499; SDAG-DAZ-NEXT: v_rsq_f32_e32 v4, v0 4500; SDAG-DAZ-NEXT: v_mov_b32_e32 v5, 0x260 4501; SDAG-DAZ-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v5 4502; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v1, v3, v2, s[0:1] 4503; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v4 4504; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v4 4505; SDAG-DAZ-NEXT: v_fma_f32 v4, -v3, v2, 0.5 4506; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v4, v2 4507; SDAG-DAZ-NEXT: v_fma_f32 v6, -v2, v2, v0 4508; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v4, v3 4509; SDAG-DAZ-NEXT: v_fma_f32 v2, v6, v3, v2 4510; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 4511; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 4512; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 4513; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 4514; SDAG-DAZ-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4515; SDAG-DAZ-NEXT: s_endpgm 4516; 4517; GISEL-DAZ-LABEL: elim_redun_check_v2: 4518; GISEL-DAZ: ; %bb.0: ; %entry 4519; GISEL-DAZ-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 4520; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 4521; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 4522; GISEL-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4523; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, s6 4524; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, s6, v1 4525; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 4526; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 4527; GISEL-DAZ-NEXT: v_rsq_f32_e32 v3, v2 4528; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, s7 4529; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, s7, v1 4530; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, v2, v3 4531; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3 4532; GISEL-DAZ-NEXT: v_fma_f32 v6, -v3, v5, 0.5 4533; GISEL-DAZ-NEXT: v_fma_f32 v5, v5, v6, v5 4534; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v6, v3 4535; GISEL-DAZ-NEXT: v_fma_f32 v6, -v5, v5, v2 4536; GISEL-DAZ-NEXT: v_fma_f32 v3, v6, v3, v5 4537; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0x37800000, v3 4538; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 4539; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 4540; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 4541; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 4542; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x260 4543; GISEL-DAZ-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v4 4544; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1] 4545; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, v0, v1 4546; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 4547; GISEL-DAZ-NEXT: v_fma_f32 v5, -v1, v3, 0.5 4548; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3 4549; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v5, v1 4550; GISEL-DAZ-NEXT: v_fma_f32 v5, -v3, v3, v0 4551; GISEL-DAZ-NEXT: v_fma_f32 v1, v5, v1, v3 4552; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v1 4553; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 4554; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v4 4555; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v3, 1 4556; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 4557; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x7fc00000 4558; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s6, v3 4559; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 4560; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s7, v3 4561; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 4562; GISEL-DAZ-NEXT: s_mov_b32 s6, -1 4563; GISEL-DAZ-NEXT: s_mov_b32 s7, 0xf000 4564; GISEL-DAZ-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4565; GISEL-DAZ-NEXT: s_endpgm 4566entry: 4567 %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) 4568 %cmp = fcmp olt <2 x float> %in, <float -0.000000e+00, float -0.000000e+00> 4569 %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt 4570 store <2 x float> %res, ptr addrspace(1) %out 4571 ret void 4572} 4573 4574define amdgpu_kernel void @elim_redun_check_v2_ult(ptr addrspace(1) %out, <2 x float> %in) { 4575; SDAG-IEEE-LABEL: elim_redun_check_v2_ult: 4576; SDAG-IEEE: ; %bb.0: ; %entry 4577; SDAG-IEEE-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 4578; SDAG-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 4579; SDAG-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 4580; SDAG-IEEE-NEXT: s_mov_b32 s7, 0xf000 4581; SDAG-IEEE-NEXT: s_mov_b32 s6, -1 4582; SDAG-IEEE-NEXT: s_waitcnt lgkmcnt(0) 4583; SDAG-IEEE-NEXT: v_mul_f32_e32 v2, s11, v1 4584; SDAG-IEEE-NEXT: v_mov_b32_e32 v3, s11 4585; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s11, v0 4586; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1] 4587; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v3, v2 4588; SDAG-IEEE-NEXT: v_mul_f32_e32 v1, s10, v1 4589; SDAG-IEEE-NEXT: s_mov_b32 s4, s8 4590; SDAG-IEEE-NEXT: s_mov_b32 s5, s9 4591; SDAG-IEEE-NEXT: v_add_i32_e32 v4, vcc, -1, v3 4592; SDAG-IEEE-NEXT: v_fma_f32 v5, -v4, v3, v2 4593; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v5 4594; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc 4595; SDAG-IEEE-NEXT: v_add_i32_e32 v5, vcc, 1, v3 4596; SDAG-IEEE-NEXT: v_fma_f32 v3, -v5, v3, v2 4597; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v3 4598; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc 4599; SDAG-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 4600; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] 4601; SDAG-IEEE-NEXT: v_mov_b32_e32 v5, s10 4602; SDAG-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], s10, v0 4603; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v0, v5, v1, s[0:1] 4604; SDAG-IEEE-NEXT: v_sqrt_f32_e32 v5, v0 4605; SDAG-IEEE-NEXT: v_mov_b32_e32 v4, 0x260 4606; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v2, v4 4607; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc 4608; SDAG-IEEE-NEXT: v_add_i32_e32 v2, vcc, -1, v5 4609; SDAG-IEEE-NEXT: v_fma_f32 v3, -v2, v5, v0 4610; SDAG-IEEE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3 4611; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc 4612; SDAG-IEEE-NEXT: v_add_i32_e32 v3, vcc, 1, v5 4613; SDAG-IEEE-NEXT: v_fma_f32 v5, -v3, v5, v0 4614; SDAG-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v5 4615; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 4616; SDAG-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 4617; SDAG-IEEE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] 4618; SDAG-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v4 4619; SDAG-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 4620; SDAG-IEEE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4621; SDAG-IEEE-NEXT: s_endpgm 4622; 4623; GISEL-IEEE-LABEL: elim_redun_check_v2_ult: 4624; GISEL-IEEE: ; %bb.0: ; %entry 4625; GISEL-IEEE-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 4626; GISEL-IEEE-NEXT: v_mov_b32_e32 v0, 0xf800000 4627; GISEL-IEEE-NEXT: v_mov_b32_e32 v1, 0x4f800000 4628; GISEL-IEEE-NEXT: s_waitcnt lgkmcnt(0) 4629; GISEL-IEEE-NEXT: v_mov_b32_e32 v2, s6 4630; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, s6, v1 4631; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 4632; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 4633; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v3, v2 4634; GISEL-IEEE-NEXT: v_mul_f32_e32 v1, s7, v1 4635; GISEL-IEEE-NEXT: v_add_i32_e64 v4, s[0:1], -1, v3 4636; GISEL-IEEE-NEXT: v_add_i32_e64 v5, s[0:1], 1, v3 4637; GISEL-IEEE-NEXT: v_fma_f32 v6, -v4, v3, v2 4638; GISEL-IEEE-NEXT: v_fma_f32 v7, -v5, v3, v2 4639; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v6 4640; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v4, s[0:1] 4641; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v7 4642; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[0:1] 4643; GISEL-IEEE-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 4644; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 4645; GISEL-IEEE-NEXT: v_mov_b32_e32 v5, s7 4646; GISEL-IEEE-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 4647; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v5, v1, vcc 4648; GISEL-IEEE-NEXT: v_sqrt_f32_e32 v1, v0 4649; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x260 4650; GISEL-IEEE-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v4 4651; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1] 4652; GISEL-IEEE-NEXT: v_add_i32_e64 v3, s[0:1], -1, v1 4653; GISEL-IEEE-NEXT: v_fma_f32 v5, -v3, v1, v0 4654; GISEL-IEEE-NEXT: v_add_i32_e64 v6, s[0:1], 1, v1 4655; GISEL-IEEE-NEXT: v_fma_f32 v7, -v6, v1, v0 4656; GISEL-IEEE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v5 4657; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 4658; GISEL-IEEE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v7 4659; GISEL-IEEE-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[0:1] 4660; GISEL-IEEE-NEXT: v_mul_f32_e32 v3, 0x37800000, v1 4661; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 4662; GISEL-IEEE-NEXT: v_cmp_class_f32_e32 vcc, v0, v4 4663; GISEL-IEEE-NEXT: v_bfrev_b32_e32 v3, 1 4664; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 4665; GISEL-IEEE-NEXT: v_mov_b32_e32 v4, 0x7fc00000 4666; GISEL-IEEE-NEXT: v_cmp_nge_f32_e32 vcc, s6, v3 4667; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 4668; GISEL-IEEE-NEXT: v_cmp_nge_f32_e32 vcc, s7, v3 4669; GISEL-IEEE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 4670; GISEL-IEEE-NEXT: s_mov_b32 s6, -1 4671; GISEL-IEEE-NEXT: s_mov_b32 s7, 0xf000 4672; GISEL-IEEE-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4673; GISEL-IEEE-NEXT: s_endpgm 4674; 4675; SDAG-DAZ-LABEL: elim_redun_check_v2_ult: 4676; SDAG-DAZ: ; %bb.0: ; %entry 4677; SDAG-DAZ-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 4678; SDAG-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 4679; SDAG-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 4680; SDAG-DAZ-NEXT: s_mov_b32 s7, 0xf000 4681; SDAG-DAZ-NEXT: s_mov_b32 s6, -1 4682; SDAG-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4683; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, s3, v1 4684; SDAG-DAZ-NEXT: v_mov_b32_e32 v3, s3 4685; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s3, v0 4686; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 4687; SDAG-DAZ-NEXT: v_rsq_f32_e32 v3, v2 4688; SDAG-DAZ-NEXT: v_mul_f32_e32 v1, s2, v1 4689; SDAG-DAZ-NEXT: s_mov_b32 s4, s0 4690; SDAG-DAZ-NEXT: s_mov_b32 s5, s1 4691; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, v2, v3 4692; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3 4693; SDAG-DAZ-NEXT: v_fma_f32 v5, -v3, v4, 0.5 4694; SDAG-DAZ-NEXT: v_fma_f32 v4, v4, v5, v4 4695; SDAG-DAZ-NEXT: v_fma_f32 v6, -v4, v4, v2 4696; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3 4697; SDAG-DAZ-NEXT: v_fma_f32 v3, v6, v3, v4 4698; SDAG-DAZ-NEXT: v_mul_f32_e32 v4, 0x37800000, v3 4699; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc 4700; SDAG-DAZ-NEXT: v_mov_b32_e32 v4, s2 4701; SDAG-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 4702; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 4703; SDAG-DAZ-NEXT: v_rsq_f32_e32 v4, v0 4704; SDAG-DAZ-NEXT: v_mov_b32_e32 v5, 0x260 4705; SDAG-DAZ-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v5 4706; SDAG-DAZ-NEXT: v_cndmask_b32_e64 v1, v3, v2, s[0:1] 4707; SDAG-DAZ-NEXT: v_mul_f32_e32 v2, v0, v4 4708; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v4 4709; SDAG-DAZ-NEXT: v_fma_f32 v4, -v3, v2, 0.5 4710; SDAG-DAZ-NEXT: v_fma_f32 v2, v2, v4, v2 4711; SDAG-DAZ-NEXT: v_fma_f32 v6, -v2, v2, v0 4712; SDAG-DAZ-NEXT: v_fma_f32 v3, v3, v4, v3 4713; SDAG-DAZ-NEXT: v_fma_f32 v2, v6, v3, v2 4714; SDAG-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 4715; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 4716; SDAG-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 4717; SDAG-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 4718; SDAG-DAZ-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4719; SDAG-DAZ-NEXT: s_endpgm 4720; 4721; GISEL-DAZ-LABEL: elim_redun_check_v2_ult: 4722; GISEL-DAZ: ; %bb.0: ; %entry 4723; GISEL-DAZ-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 4724; GISEL-DAZ-NEXT: v_mov_b32_e32 v0, 0xf800000 4725; GISEL-DAZ-NEXT: v_mov_b32_e32 v1, 0x4f800000 4726; GISEL-DAZ-NEXT: s_waitcnt lgkmcnt(0) 4727; GISEL-DAZ-NEXT: v_mov_b32_e32 v2, s6 4728; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, s6, v1 4729; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 4730; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 4731; GISEL-DAZ-NEXT: v_rsq_f32_e32 v3, v2 4732; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, s7 4733; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, s7, v1 4734; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, v2, v3 4735; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0.5, v3 4736; GISEL-DAZ-NEXT: v_fma_f32 v6, -v3, v5, 0.5 4737; GISEL-DAZ-NEXT: v_fma_f32 v5, v5, v6, v5 4738; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v6, v3 4739; GISEL-DAZ-NEXT: v_fma_f32 v6, -v5, v5, v2 4740; GISEL-DAZ-NEXT: v_fma_f32 v3, v6, v3, v5 4741; GISEL-DAZ-NEXT: v_mul_f32_e32 v5, 0x37800000, v3 4742; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 4743; GISEL-DAZ-NEXT: v_cmp_lt_f32_e32 vcc, s7, v0 4744; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 4745; GISEL-DAZ-NEXT: v_rsq_f32_e32 v1, v0 4746; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x260 4747; GISEL-DAZ-NEXT: v_cmp_class_f32_e64 s[0:1], v2, v4 4748; GISEL-DAZ-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[0:1] 4749; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, v0, v1 4750; GISEL-DAZ-NEXT: v_mul_f32_e32 v1, 0.5, v1 4751; GISEL-DAZ-NEXT: v_fma_f32 v5, -v1, v3, 0.5 4752; GISEL-DAZ-NEXT: v_fma_f32 v3, v3, v5, v3 4753; GISEL-DAZ-NEXT: v_fma_f32 v1, v1, v5, v1 4754; GISEL-DAZ-NEXT: v_fma_f32 v5, -v3, v3, v0 4755; GISEL-DAZ-NEXT: v_fma_f32 v1, v5, v1, v3 4756; GISEL-DAZ-NEXT: v_mul_f32_e32 v3, 0x37800000, v1 4757; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 4758; GISEL-DAZ-NEXT: v_cmp_class_f32_e32 vcc, v0, v4 4759; GISEL-DAZ-NEXT: v_bfrev_b32_e32 v3, 1 4760; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 4761; GISEL-DAZ-NEXT: v_mov_b32_e32 v4, 0x7fc00000 4762; GISEL-DAZ-NEXT: v_cmp_nge_f32_e32 vcc, s6, v3 4763; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 4764; GISEL-DAZ-NEXT: v_cmp_nge_f32_e32 vcc, s7, v3 4765; GISEL-DAZ-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 4766; GISEL-DAZ-NEXT: s_mov_b32 s6, -1 4767; GISEL-DAZ-NEXT: s_mov_b32 s7, 0xf000 4768; GISEL-DAZ-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4769; GISEL-DAZ-NEXT: s_endpgm 4770entry: 4771 %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) 4772 %cmp = fcmp ult <2 x float> %in, <float -0.000000e+00, float -0.000000e+00> 4773 %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt 4774 store <2 x float> %res, ptr addrspace(1) %out 4775 ret void 4776} 4777 4778declare float @llvm.fabs.f32(float) #0 4779declare float @llvm.sqrt.f32(float) #0 4780declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0 4781declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0 4782declare <3 x float> @llvm.sqrt.v3f32(<3 x float>) #0 4783declare i32 @llvm.amdgcn.readfirstlane(i32) #1 4784 4785declare { float, i32 } @llvm.frexp.f32.i32(float) #0 4786 4787attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 4788attributes #1 = { convergent nounwind willreturn memory(none) } 4789attributes #2 = { "approx-func-fp-math"="true" } 4790attributes #3 = { "approx-func-fp-math"="true" "no-nans-fp-math"="true" "no-infs-fp-math"="true" } 4791attributes #4 = { "unsafe-fp-math"="true" } 4792attributes #5 = { "no-infs-fp-math"="true" } 4793 4794!0 = !{float 0.5} 4795!1 = !{float 1.0} 4796!2 = !{float 2.0} 4797!3 = !{float 2.5} 4798!4 = !{float 3.0} 4799;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 4800; GCN-IEEE: {{.*}} 4801