1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-UNSAFE,SI-DAZ-UNSAFE %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-UNSAFE,SI-IEEE-UNSAFE %s 4 5; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-SAFE,SI-DAZ-SAFE %s 6; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-SAFE,SI-IEEE-SAFE %s 7 8 9; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-UNSAFE,CI-DAZ-UNSAFE %s 10; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=ieee -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-UNSAFE,CI-IEEE-UNSAFE %s 11 12; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-SAFE,CI-DAZ-SAFE %s 13; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-SAFE,CI-IEEE-SAFE %s 14 15 16declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 17declare float @llvm.sqrt.f32(float) nounwind readnone 18declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) nounwind readnone 19 20define amdgpu_kernel void @rsq_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { 21; GCN-DAZ-UNSAFE-LABEL: rsq_f32: 22; GCN-DAZ-UNSAFE: ; %bb.0: 23; GCN-DAZ-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 24; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 25; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s6, -1 26; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s10, s6 27; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s11, s7 28; GCN-DAZ-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 29; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s8, s2 30; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s9, s3 31; GCN-DAZ-UNSAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 32; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s4, s0 33; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s5, s1 34; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) 35; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 36; GCN-DAZ-UNSAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 37; GCN-DAZ-UNSAFE-NEXT: s_endpgm 38; 39; GCN-IEEE-UNSAFE-LABEL: rsq_f32: 40; GCN-IEEE-UNSAFE: ; %bb.0: 41; GCN-IEEE-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 42; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 43; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s6, -1 44; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s10, s6 45; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s11, s7 46; GCN-IEEE-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 47; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s8, s2 48; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s9, s3 49; GCN-IEEE-UNSAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 50; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s4, s0 51; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s5, s1 52; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) 53; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 54; GCN-IEEE-UNSAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 55; GCN-IEEE-UNSAFE-NEXT: s_endpgm 56; 57; GCN-DAZ-SAFE-LABEL: rsq_f32: 58; GCN-DAZ-SAFE: ; %bb.0: 59; GCN-DAZ-SAFE-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 60; GCN-DAZ-SAFE-NEXT: s_mov_b32 s7, 0xf000 61; GCN-DAZ-SAFE-NEXT: s_mov_b32 s6, -1 62; GCN-DAZ-SAFE-NEXT: s_mov_b32 s10, s6 63; GCN-DAZ-SAFE-NEXT: s_mov_b32 s11, s7 64; GCN-DAZ-SAFE-NEXT: s_waitcnt lgkmcnt(0) 65; GCN-DAZ-SAFE-NEXT: s_mov_b32 s8, s2 66; GCN-DAZ-SAFE-NEXT: s_mov_b32 s9, s3 67; GCN-DAZ-SAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 68; GCN-DAZ-SAFE-NEXT: s_mov_b32 s2, 0xf800000 69; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 70; GCN-DAZ-SAFE-NEXT: s_mov_b32 s4, s0 71; GCN-DAZ-SAFE-NEXT: s_mov_b32 s5, s1 72; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) 73; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 74; GCN-DAZ-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0 75; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 76; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v1, v0 77; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, v0, v1 78; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, 0.5, v1 79; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v1, v3, 0.5 80; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, v3, v4, v3 81; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v1, v4, v1 82; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v3, v3, v0 83; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v4, v1, v3 84; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v1 85; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 86; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 87; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 88; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e32 v0, v0 89; GCN-DAZ-SAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 90; GCN-DAZ-SAFE-NEXT: s_endpgm 91; 92; SI-IEEE-SAFE-LABEL: rsq_f32: 93; SI-IEEE-SAFE: ; %bb.0: 94; SI-IEEE-SAFE-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 95; SI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0xf000 96; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, -1 97; SI-IEEE-SAFE-NEXT: s_mov_b32 s2, s6 98; SI-IEEE-SAFE-NEXT: s_mov_b32 s3, s7 99; SI-IEEE-SAFE-NEXT: s_waitcnt lgkmcnt(0) 100; SI-IEEE-SAFE-NEXT: s_mov_b32 s0, s10 101; SI-IEEE-SAFE-NEXT: s_mov_b32 s1, s11 102; SI-IEEE-SAFE-NEXT: buffer_load_dword v0, off, s[0:3], 0 103; SI-IEEE-SAFE-NEXT: s_mov_b32 s0, 0xf800000 104; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x260 105; SI-IEEE-SAFE-NEXT: s_mov_b32 s2, 0x7f800000 106; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, s8 107; SI-IEEE-SAFE-NEXT: s_mov_b32 s5, s9 108; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) 109; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 110; SI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0 111; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 112; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v0 113; SI-IEEE-SAFE-NEXT: v_add_i32_e32 v3, vcc, -1, v2 114; SI-IEEE-SAFE-NEXT: v_add_i32_e32 v4, vcc, 1, v2 115; SI-IEEE-SAFE-NEXT: v_fma_f32 v5, -v3, v2, v0 116; SI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v4, v2, v0 117; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v5 118; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 119; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v6 120; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 121; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 122; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] 123; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 124; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 125; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e32 v1, v0 126; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s2 127; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 128; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 129; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 130; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 131; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 132; SI-IEEE-SAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 133; SI-IEEE-SAFE-NEXT: s_endpgm 134; 135; CI-IEEE-SAFE-LABEL: rsq_f32: 136; CI-IEEE-SAFE: ; %bb.0: 137; CI-IEEE-SAFE-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 138; CI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0xf000 139; CI-IEEE-SAFE-NEXT: s_mov_b32 s6, -1 140; CI-IEEE-SAFE-NEXT: s_mov_b32 s2, s6 141; CI-IEEE-SAFE-NEXT: s_mov_b32 s3, s7 142; CI-IEEE-SAFE-NEXT: s_waitcnt lgkmcnt(0) 143; CI-IEEE-SAFE-NEXT: s_mov_b32 s0, s10 144; CI-IEEE-SAFE-NEXT: s_mov_b32 s1, s11 145; CI-IEEE-SAFE-NEXT: buffer_load_dword v0, off, s[0:3], 0 146; CI-IEEE-SAFE-NEXT: s_mov_b32 s0, 0xf800000 147; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x260 148; CI-IEEE-SAFE-NEXT: s_mov_b32 s4, s8 149; CI-IEEE-SAFE-NEXT: s_mov_b32 s5, s9 150; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) 151; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 152; CI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0 153; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 154; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v0 155; CI-IEEE-SAFE-NEXT: v_add_i32_e32 v3, vcc, -1, v2 156; CI-IEEE-SAFE-NEXT: v_add_i32_e32 v4, vcc, 1, v2 157; CI-IEEE-SAFE-NEXT: v_fma_f32 v5, -v3, v2, v0 158; CI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v4, v2, v0 159; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v5 160; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 161; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v6 162; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 163; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 164; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] 165; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 166; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 167; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e32 v1, v0 168; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 169; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 170; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 171; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 172; CI-IEEE-SAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 173; CI-IEEE-SAFE-NEXT: s_endpgm 174; GCN-UNSAFE-LABEL: rsq_f32: 175; GCN-UNSAFE: ; %bb.0: 176; GCN-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 177; GCN-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 178; GCN-UNSAFE-NEXT: s_mov_b32 s6, -1 179; GCN-UNSAFE-NEXT: s_mov_b32 s10, s6 180; GCN-UNSAFE-NEXT: s_mov_b32 s11, s7 181; GCN-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 182; GCN-UNSAFE-NEXT: s_mov_b32 s8, s2 183; GCN-UNSAFE-NEXT: s_mov_b32 s9, s3 184; GCN-UNSAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 185; GCN-UNSAFE-NEXT: s_mov_b32 s4, s0 186; GCN-UNSAFE-NEXT: s_mov_b32 s5, s1 187; GCN-UNSAFE-NEXT: s_waitcnt vmcnt(0) 188; GCN-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 189; GCN-UNSAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 190; GCN-UNSAFE-NEXT: s_endpgm 191 %val = load float, ptr addrspace(1) %in, align 4 192 %sqrt = call contract float @llvm.sqrt.f32(float %val) nounwind readnone 193 %div = fdiv contract float 1.0, %sqrt, !fpmath !0 194 store float %div, ptr addrspace(1) %out, align 4 195 ret void 196} 197 198define amdgpu_kernel void @rsq_f32_sgpr(ptr addrspace(1) noalias %out, float %val) { 199; GCN-DAZ-UNSAFE-LABEL: rsq_f32_sgpr: 200; GCN-DAZ-UNSAFE: ; %bb.0: 201; GCN-DAZ-UNSAFE-NEXT: s_load_dword s2, s[4:5], 0xb 202; GCN-DAZ-UNSAFE-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 203; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s3, 0xf000 204; GCN-DAZ-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 205; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v0, s2 206; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s2, -1 207; GCN-DAZ-UNSAFE-NEXT: buffer_store_dword v0, off, s[0:3], 0 208; GCN-DAZ-UNSAFE-NEXT: s_endpgm 209; 210; GCN-IEEE-UNSAFE-LABEL: rsq_f32_sgpr: 211; GCN-IEEE-UNSAFE: ; %bb.0: 212; GCN-IEEE-UNSAFE-NEXT: s_load_dword s2, s[4:5], 0xb 213; GCN-IEEE-UNSAFE-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 214; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s3, 0xf000 215; GCN-IEEE-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 216; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, s2 217; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s2, -1 218; GCN-IEEE-UNSAFE-NEXT: buffer_store_dword v0, off, s[0:3], 0 219; GCN-IEEE-UNSAFE-NEXT: s_endpgm 220; 221; GCN-DAZ-SAFE-LABEL: rsq_f32_sgpr: 222; GCN-DAZ-SAFE: ; %bb.0: 223; GCN-DAZ-SAFE-NEXT: s_load_dword s0, s[4:5], 0xb 224; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v0, 0xf800000 225; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v1, 0x4f800000 226; GCN-DAZ-SAFE-NEXT: s_mov_b32 s3, 0xf000 227; GCN-DAZ-SAFE-NEXT: s_mov_b32 s2, -1 228; GCN-DAZ-SAFE-NEXT: s_waitcnt lgkmcnt(0) 229; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, s0, v1 230; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v2, s0 231; GCN-DAZ-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0 232; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 233; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v1, v0 234; GCN-DAZ-SAFE-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 235; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, v0, v1 236; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, 0.5, v1 237; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, -v1, v2, 0.5 238; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v2, v3, v2 239; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v1, v3, v1 240; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, -v2, v2, v0 241; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v3, v1, v2 242; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 243; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 244; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 245; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 246; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 247; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e32 v0, v0 248; GCN-DAZ-SAFE-NEXT: s_waitcnt lgkmcnt(0) 249; GCN-DAZ-SAFE-NEXT: buffer_store_dword v0, off, s[0:3], 0 250; GCN-DAZ-SAFE-NEXT: s_endpgm 251; 252; SI-IEEE-SAFE-LABEL: rsq_f32_sgpr: 253; SI-IEEE-SAFE: ; %bb.0: 254; SI-IEEE-SAFE-NEXT: s_load_dword s0, s[4:5], 0xb 255; SI-IEEE-SAFE-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 256; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v0, 0xf800000 257; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x4f800000 258; SI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0xf000 259; SI-IEEE-SAFE-NEXT: s_waitcnt lgkmcnt(0) 260; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, s0, v1 261; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, s0 262; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, v0 263; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1] 264; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 265; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, -1 266; SI-IEEE-SAFE-NEXT: v_add_i32_e32 v2, vcc, -1, v1 267; SI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 268; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3 269; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 270; SI-IEEE-SAFE-NEXT: v_add_i32_e32 v3, vcc, 1, v1 271; SI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 272; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 273; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 274; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 275; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 276; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 277; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 278; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 279; SI-IEEE-SAFE-NEXT: s_mov_b32 s0, 0x7f800000 280; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e32 v1, v0 281; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s0 282; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 283; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 284; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 285; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 286; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 287; SI-IEEE-SAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 288; SI-IEEE-SAFE-NEXT: s_endpgm 289; 290; CI-IEEE-SAFE-LABEL: rsq_f32_sgpr: 291; CI-IEEE-SAFE: ; %bb.0: 292; CI-IEEE-SAFE-NEXT: s_load_dword s0, s[4:5], 0xb 293; CI-IEEE-SAFE-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 294; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v0, 0xf800000 295; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x4f800000 296; CI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0xf000 297; CI-IEEE-SAFE-NEXT: s_waitcnt lgkmcnt(0) 298; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, s0, v1 299; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, s0 300; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, v0 301; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[0:1] 302; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 303; CI-IEEE-SAFE-NEXT: s_mov_b32 s6, -1 304; CI-IEEE-SAFE-NEXT: v_add_i32_e32 v2, vcc, -1, v1 305; CI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 306; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v3 307; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 308; CI-IEEE-SAFE-NEXT: v_add_i32_e32 v3, vcc, 1, v1 309; CI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 310; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v1 311; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 312; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 313; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v1, v2, s[0:1] 314; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 315; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 316; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 317; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e32 v1, v0 318; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 319; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 320; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 321; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 322; CI-IEEE-SAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 323; CI-IEEE-SAFE-NEXT: s_endpgm 324; GCN-UNSAFE-LABEL: rsq_f32_sgpr: 325; GCN-UNSAFE: ; %bb.0: 326; GCN-UNSAFE-NEXT: s_load_dword s2, s[0:1], 0xb 327; GCN-UNSAFE-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 328; GCN-UNSAFE-NEXT: s_mov_b32 s3, 0xf000 329; GCN-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 330; GCN-UNSAFE-NEXT: v_rsq_f32_e32 v0, s2 331; GCN-UNSAFE-NEXT: s_mov_b32 s2, -1 332; GCN-UNSAFE-NEXT: buffer_store_dword v0, off, s[0:3], 0 333; GCN-UNSAFE-NEXT: s_endpgm 334 %sqrt = call contract float @llvm.sqrt.f32(float %val) nounwind readnone 335 %div = fdiv contract float 1.0, %sqrt, !fpmath !0 336 store float %div, ptr addrspace(1) %out, align 4 337 ret void 338} 339 340; Recognize that this is rsqrt(a) * rcp(b) * c, 341; not 1 / ( 1 / sqrt(a)) * rcp(b) * c. 342 343; NOTE: c * rcp( sqrt(a) * b ) is generated when we move rcp generation to AMGGPUCogenPrepare. 344define amdgpu_kernel void @rsqrt_fmul(ptr addrspace(1) %out, ptr addrspace(1) %in) { 345; GCN-UNSAFE-LABEL: rsqrt_fmul: 346; GCN-UNSAFE: ; %bb.0: 347; GCN-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 348; GCN-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 349; GCN-UNSAFE-NEXT: s_mov_b32 s6, 0 350; GCN-UNSAFE-NEXT: v_lshlrev_b32_e32 v0, 2, v0 351; GCN-UNSAFE-NEXT: v_mov_b32_e32 v1, 0 352; GCN-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 353; GCN-UNSAFE-NEXT: s_mov_b64 s[8:9], s[2:3] 354; GCN-UNSAFE-NEXT: s_mov_b64 s[10:11], s[6:7] 355; GCN-UNSAFE-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 356; GCN-UNSAFE-NEXT: s_waitcnt vmcnt(0) 357; GCN-UNSAFE-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 offset:4 glc 358; GCN-UNSAFE-NEXT: s_waitcnt vmcnt(0) 359; GCN-UNSAFE-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 offset:8 glc 360; GCN-UNSAFE-NEXT: s_waitcnt vmcnt(0) 361; GCN-UNSAFE-NEXT: s_mov_b64 s[4:5], s[0:1] 362; GCN-UNSAFE-NEXT: v_sqrt_f32_e32 v2, v2 363; GCN-UNSAFE-NEXT: v_mul_f32_e32 v2, v2, v3 364; GCN-UNSAFE-NEXT: v_rcp_f32_e32 v2, v2 365; GCN-UNSAFE-NEXT: v_mul_f32_e32 v2, v4, v2 366; GCN-UNSAFE-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 367; GCN-UNSAFE-NEXT: s_endpgm 368; GCN-DAZ-UNSAFE-LABEL: rsqrt_fmul: 369; GCN-DAZ-UNSAFE: ; %bb.0: 370; GCN-DAZ-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 371; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 372; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s6, 0 373; GCN-DAZ-UNSAFE-NEXT: v_lshlrev_b32_e32 v0, 2, v0 374; GCN-DAZ-UNSAFE-NEXT: v_mov_b32_e32 v1, 0 375; GCN-DAZ-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 376; GCN-DAZ-UNSAFE-NEXT: s_mov_b64 s[8:9], s[2:3] 377; GCN-DAZ-UNSAFE-NEXT: s_mov_b64 s[10:11], s[6:7] 378; GCN-DAZ-UNSAFE-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 379; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) 380; GCN-DAZ-UNSAFE-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 offset:4 glc 381; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) 382; GCN-DAZ-UNSAFE-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 offset:8 glc 383; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) 384; GCN-DAZ-UNSAFE-NEXT: s_mov_b64 s[4:5], s[0:1] 385; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v2, v2 386; GCN-DAZ-UNSAFE-NEXT: v_rcp_f32_e32 v3, v3 387; GCN-DAZ-UNSAFE-NEXT: v_mul_f32_e32 v2, v2, v3 388; GCN-DAZ-UNSAFE-NEXT: v_mul_f32_e32 v2, v4, v2 389; GCN-DAZ-UNSAFE-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 390; GCN-DAZ-UNSAFE-NEXT: s_endpgm 391; 392; GCN-IEEE-UNSAFE-LABEL: rsqrt_fmul: 393; GCN-IEEE-UNSAFE: ; %bb.0: 394; GCN-IEEE-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 395; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 396; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s6, 0 397; GCN-IEEE-UNSAFE-NEXT: v_lshlrev_b32_e32 v0, 2, v0 398; GCN-IEEE-UNSAFE-NEXT: v_mov_b32_e32 v1, 0 399; GCN-IEEE-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 400; GCN-IEEE-UNSAFE-NEXT: s_mov_b64 s[8:9], s[2:3] 401; GCN-IEEE-UNSAFE-NEXT: s_mov_b64 s[10:11], s[6:7] 402; GCN-IEEE-UNSAFE-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 403; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) 404; GCN-IEEE-UNSAFE-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 offset:4 glc 405; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) 406; GCN-IEEE-UNSAFE-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 offset:8 glc 407; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) 408; GCN-IEEE-UNSAFE-NEXT: s_mov_b64 s[4:5], s[0:1] 409; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v2, v2 410; GCN-IEEE-UNSAFE-NEXT: v_rcp_f32_e32 v3, v3 411; GCN-IEEE-UNSAFE-NEXT: v_mul_f32_e32 v2, v2, v3 412; GCN-IEEE-UNSAFE-NEXT: v_mul_f32_e32 v2, v4, v2 413; GCN-IEEE-UNSAFE-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 414; GCN-IEEE-UNSAFE-NEXT: s_endpgm 415; 416; GCN-DAZ-SAFE-LABEL: rsqrt_fmul: 417; GCN-DAZ-SAFE: ; %bb.0: 418; GCN-DAZ-SAFE-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 419; GCN-DAZ-SAFE-NEXT: s_mov_b32 s3, 0xf000 420; GCN-DAZ-SAFE-NEXT: s_mov_b32 s2, 0 421; GCN-DAZ-SAFE-NEXT: v_lshlrev_b32_e32 v0, 2, v0 422; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v1, 0 423; GCN-DAZ-SAFE-NEXT: s_waitcnt lgkmcnt(0) 424; GCN-DAZ-SAFE-NEXT: s_mov_b64 s[8:9], s[6:7] 425; GCN-DAZ-SAFE-NEXT: s_mov_b64 s[10:11], s[2:3] 426; GCN-DAZ-SAFE-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 427; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) 428; GCN-DAZ-SAFE-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 offset:4 glc 429; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) 430; GCN-DAZ-SAFE-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 offset:8 glc 431; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) 432; GCN-DAZ-SAFE-NEXT: s_mov_b32 s0, 0xf800000 433; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v6, 0x260 434; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v5, 0x4f800000, v2 435; GCN-DAZ-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2 436; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 437; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v5, v2 438; GCN-DAZ-SAFE-NEXT: s_mov_b64 s[0:1], s[4:5] 439; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v7, v2, v5 440; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v5, 0.5, v5 441; GCN-DAZ-SAFE-NEXT: v_fma_f32 v8, -v5, v7, 0.5 442; GCN-DAZ-SAFE-NEXT: v_fma_f32 v7, v7, v8, v7 443; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, v5, v8, v5 444; GCN-DAZ-SAFE-NEXT: v_fma_f32 v8, -v7, v7, v2 445; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, v8, v5, v7 446; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v7, 0x37800000, v5 447; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc 448; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v2, v6 449; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc 450; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, v2, v3 451; GCN-DAZ-SAFE-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v4 452; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e32 v5, v3 453; GCN-DAZ-SAFE-NEXT: v_div_scale_f32 v6, vcc, v4, v2, v4 454; GCN-DAZ-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 455; GCN-DAZ-SAFE-NEXT: v_fma_f32 v7, -v3, v5, 1.0 456; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, v7, v5, v5 457; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v7, v6, v5 458; GCN-DAZ-SAFE-NEXT: v_fma_f32 v8, -v3, v7, v6 459; GCN-DAZ-SAFE-NEXT: v_fma_f32 v7, v8, v5, v7 460; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, -v3, v7, v6 461; GCN-DAZ-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 462; GCN-DAZ-SAFE-NEXT: v_div_fmas_f32 v3, v3, v5, v7 463; GCN-DAZ-SAFE-NEXT: v_div_fixup_f32 v2, v3, v2, v4 464; GCN-DAZ-SAFE-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 465; GCN-DAZ-SAFE-NEXT: s_endpgm 466; 467; GCN-IEEE-SAFE-LABEL: rsqrt_fmul: 468; GCN-IEEE-SAFE: ; %bb.0: 469; GCN-IEEE-SAFE-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 470; GCN-IEEE-SAFE-NEXT: s_mov_b32 s3, 0xf000 471; GCN-IEEE-SAFE-NEXT: s_mov_b32 s2, 0 472; GCN-IEEE-SAFE-NEXT: v_lshlrev_b32_e32 v0, 2, v0 473; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0 474; GCN-IEEE-SAFE-NEXT: s_waitcnt lgkmcnt(0) 475; GCN-IEEE-SAFE-NEXT: s_mov_b64 s[8:9], s[6:7] 476; GCN-IEEE-SAFE-NEXT: s_mov_b64 s[10:11], s[2:3] 477; GCN-IEEE-SAFE-NEXT: buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc 478; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) 479; GCN-IEEE-SAFE-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 offset:4 glc 480; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) 481; GCN-IEEE-SAFE-NEXT: buffer_load_dword v4, v[0:1], s[8:11], 0 addr64 offset:8 glc 482; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) 483; GCN-IEEE-SAFE-NEXT: s_mov_b32 s0, 0xf800000 484; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v6, 0x260 485; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v5, 0x4f800000, v2 486; GCN-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s0, v2 487; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 488; GCN-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v5, v2 489; GCN-IEEE-SAFE-NEXT: v_add_i32_e64 v7, s[0:1], -1, v5 490; GCN-IEEE-SAFE-NEXT: v_add_i32_e64 v8, s[0:1], 1, v5 491; GCN-IEEE-SAFE-NEXT: v_fma_f32 v9, -v7, v5, v2 492; GCN-IEEE-SAFE-NEXT: v_fma_f32 v10, -v8, v5, v2 493; GCN-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[0:1], 0, v9 494; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[0:1] 495; GCN-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[0:1], 0, v10 496; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[0:1] 497; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v7, 0x37800000, v5 498; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc 499; GCN-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v2, v6 500; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc 501; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, v2, v3 502; GCN-IEEE-SAFE-NEXT: v_div_scale_f32 v3, s[0:1], v2, v2, v4 503; GCN-IEEE-SAFE-NEXT: v_rcp_f32_e32 v5, v3 504; GCN-IEEE-SAFE-NEXT: v_div_scale_f32 v6, vcc, v4, v2, v4 505; GCN-IEEE-SAFE-NEXT: s_mov_b64 s[0:1], s[4:5] 506; GCN-IEEE-SAFE-NEXT: v_fma_f32 v7, -v3, v5, 1.0 507; GCN-IEEE-SAFE-NEXT: v_fma_f32 v5, v7, v5, v5 508; GCN-IEEE-SAFE-NEXT: v_mul_f32_e32 v7, v6, v5 509; GCN-IEEE-SAFE-NEXT: v_fma_f32 v8, -v3, v7, v6 510; GCN-IEEE-SAFE-NEXT: v_fma_f32 v7, v8, v5, v7 511; GCN-IEEE-SAFE-NEXT: v_fma_f32 v3, -v3, v7, v6 512; GCN-IEEE-SAFE-NEXT: v_div_fmas_f32 v3, v3, v5, v7 513; GCN-IEEE-SAFE-NEXT: v_div_fixup_f32 v2, v3, v2, v4 514; GCN-IEEE-SAFE-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 515; GCN-IEEE-SAFE-NEXT: s_endpgm 516 %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone 517 %out.gep = getelementptr float, ptr addrspace(1) %out, i32 %tid 518 %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid 519 %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 520 %gep.2 = getelementptr float, ptr addrspace(1) %gep.0, i32 2 521 522 %a = load volatile float, ptr addrspace(1) %gep.0 523 %b = load volatile float, ptr addrspace(1) %gep.1 524 %c = load volatile float, ptr addrspace(1) %gep.2 525 526 %x = call contract float @llvm.sqrt.f32(float %a) 527 %y = fmul contract float %x, %b 528 %z = fdiv contract float %c, %y 529 store float %z, ptr addrspace(1) %out.gep 530 ret void 531} 532 533define amdgpu_kernel void @neg_rsq_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { 534; GCN-DAZ-UNSAFE-LABEL: neg_rsq_f32: 535; GCN-DAZ-UNSAFE: ; %bb.0: 536; GCN-DAZ-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 537; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 538; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s6, -1 539; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s10, s6 540; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s11, s7 541; GCN-DAZ-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 542; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s8, s2 543; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s9, s3 544; GCN-DAZ-UNSAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 545; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s4, s0 546; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s5, s1 547; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) 548; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 549; GCN-DAZ-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 550; GCN-DAZ-UNSAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 551; GCN-DAZ-UNSAFE-NEXT: s_endpgm 552; 553; GCN-IEEE-UNSAFE-LABEL: neg_rsq_f32: 554; GCN-IEEE-UNSAFE: ; %bb.0: 555; GCN-IEEE-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 556; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 557; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s6, -1 558; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s10, s6 559; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s11, s7 560; GCN-IEEE-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 561; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s8, s2 562; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s9, s3 563; GCN-IEEE-UNSAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 564; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s4, s0 565; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s5, s1 566; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) 567; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 568; GCN-IEEE-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 569; GCN-IEEE-UNSAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 570; GCN-IEEE-UNSAFE-NEXT: s_endpgm 571; 572; GCN-DAZ-SAFE-LABEL: neg_rsq_f32: 573; GCN-DAZ-SAFE: ; %bb.0: 574; GCN-DAZ-SAFE-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 575; GCN-DAZ-SAFE-NEXT: s_mov_b32 s7, 0xf000 576; GCN-DAZ-SAFE-NEXT: s_mov_b32 s6, -1 577; GCN-DAZ-SAFE-NEXT: s_mov_b32 s10, s6 578; GCN-DAZ-SAFE-NEXT: s_mov_b32 s11, s7 579; GCN-DAZ-SAFE-NEXT: s_waitcnt lgkmcnt(0) 580; GCN-DAZ-SAFE-NEXT: s_mov_b32 s8, s2 581; GCN-DAZ-SAFE-NEXT: s_mov_b32 s9, s3 582; GCN-DAZ-SAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 583; GCN-DAZ-SAFE-NEXT: s_mov_b32 s2, 0xf800000 584; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 585; GCN-DAZ-SAFE-NEXT: s_mov_b32 s4, s0 586; GCN-DAZ-SAFE-NEXT: s_mov_b32 s5, s1 587; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) 588; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 589; GCN-DAZ-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s2, v0 590; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 591; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v1, v0 592; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, v0, v1 593; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, 0.5, v1 594; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v1, v3, 0.5 595; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, v3, v4, v3 596; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v1, v4, v1 597; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v3, v3, v0 598; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v4, v1, v3 599; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v1 600; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 601; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 602; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 603; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v0, -v0 604; GCN-DAZ-SAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 605; GCN-DAZ-SAFE-NEXT: s_endpgm 606; 607; SI-IEEE-SAFE-LABEL: neg_rsq_f32: 608; SI-IEEE-SAFE: ; %bb.0: 609; SI-IEEE-SAFE-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 610; SI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0xf000 611; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, -1 612; SI-IEEE-SAFE-NEXT: s_mov_b32 s2, s6 613; SI-IEEE-SAFE-NEXT: s_mov_b32 s3, s7 614; SI-IEEE-SAFE-NEXT: s_waitcnt lgkmcnt(0) 615; SI-IEEE-SAFE-NEXT: s_mov_b32 s0, s10 616; SI-IEEE-SAFE-NEXT: s_mov_b32 s1, s11 617; SI-IEEE-SAFE-NEXT: buffer_load_dword v0, off, s[0:3], 0 618; SI-IEEE-SAFE-NEXT: s_mov_b32 s0, 0xf800000 619; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x260 620; SI-IEEE-SAFE-NEXT: s_mov_b32 s2, 0x7f800000 621; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, s8 622; SI-IEEE-SAFE-NEXT: s_mov_b32 s5, s9 623; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) 624; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 625; SI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0 626; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 627; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v0 628; SI-IEEE-SAFE-NEXT: v_add_i32_e32 v3, vcc, -1, v2 629; SI-IEEE-SAFE-NEXT: v_add_i32_e32 v4, vcc, 1, v2 630; SI-IEEE-SAFE-NEXT: v_fma_f32 v5, -v3, v2, v0 631; SI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v4, v2, v0 632; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v5 633; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 634; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v6 635; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 636; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 637; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] 638; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 639; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 640; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v1, -v0 641; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s2 642; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, -v0, v1, s[0:1] 643; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 644; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 645; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 646; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 647; SI-IEEE-SAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 648; SI-IEEE-SAFE-NEXT: s_endpgm 649; 650; CI-IEEE-SAFE-LABEL: neg_rsq_f32: 651; CI-IEEE-SAFE: ; %bb.0: 652; CI-IEEE-SAFE-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 653; CI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0xf000 654; CI-IEEE-SAFE-NEXT: s_mov_b32 s6, -1 655; CI-IEEE-SAFE-NEXT: s_mov_b32 s2, s6 656; CI-IEEE-SAFE-NEXT: s_mov_b32 s3, s7 657; CI-IEEE-SAFE-NEXT: s_waitcnt lgkmcnt(0) 658; CI-IEEE-SAFE-NEXT: s_mov_b32 s0, s10 659; CI-IEEE-SAFE-NEXT: s_mov_b32 s1, s11 660; CI-IEEE-SAFE-NEXT: buffer_load_dword v0, off, s[0:3], 0 661; CI-IEEE-SAFE-NEXT: s_mov_b32 s0, 0xf800000 662; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x260 663; CI-IEEE-SAFE-NEXT: s_mov_b32 s4, s8 664; CI-IEEE-SAFE-NEXT: s_mov_b32 s5, s9 665; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) 666; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 667; CI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e64 s[0:1], s0, v0 668; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 669; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v0 670; CI-IEEE-SAFE-NEXT: v_add_i32_e32 v3, vcc, -1, v2 671; CI-IEEE-SAFE-NEXT: v_add_i32_e32 v4, vcc, 1, v2 672; CI-IEEE-SAFE-NEXT: v_fma_f32 v5, -v3, v2, v0 673; CI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v4, v2, v0 674; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v5 675; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 676; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v6 677; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 678; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 679; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] 680; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 681; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 682; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v1, -v0 683; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 684; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 685; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 686; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 687; CI-IEEE-SAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 688; CI-IEEE-SAFE-NEXT: s_endpgm 689; GCN-UNSAFE-LABEL: neg_rsq_f32: 690; GCN-UNSAFE: ; %bb.0: 691; GCN-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 692; GCN-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 693; GCN-UNSAFE-NEXT: s_mov_b32 s6, -1 694; GCN-UNSAFE-NEXT: s_mov_b32 s10, s6 695; GCN-UNSAFE-NEXT: s_mov_b32 s11, s7 696; GCN-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 697; GCN-UNSAFE-NEXT: s_mov_b32 s8, s2 698; GCN-UNSAFE-NEXT: s_mov_b32 s9, s3 699; GCN-UNSAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 700; GCN-UNSAFE-NEXT: s_mov_b32 s4, s0 701; GCN-UNSAFE-NEXT: s_mov_b32 s5, s1 702; GCN-UNSAFE-NEXT: s_waitcnt vmcnt(0) 703; GCN-UNSAFE-NEXT: v_sqrt_f32_e32 v0, v0 704; GCN-UNSAFE-NEXT: v_rcp_f32_e64 v0, -v0 705; GCN-UNSAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 706; GCN-UNSAFE-NEXT: s_endpgm 707 %val = load float, ptr addrspace(1) %in, align 4 708 %sqrt = call contract float @llvm.sqrt.f32(float %val) 709 %div = fdiv contract float -1.0, %sqrt, !fpmath !0 710 store float %div, ptr addrspace(1) %out, align 4 711 ret void 712} 713 714define amdgpu_kernel void @neg_rsq_neg_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) { 715; GCN-DAZ-UNSAFE-LABEL: neg_rsq_neg_f32: 716; GCN-DAZ-UNSAFE: ; %bb.0: 717; GCN-DAZ-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 718; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 719; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s6, -1 720; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s10, s6 721; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s11, s7 722; GCN-DAZ-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 723; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s8, s2 724; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s9, s3 725; GCN-DAZ-UNSAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 726; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s4, s0 727; GCN-DAZ-UNSAFE-NEXT: s_mov_b32 s5, s1 728; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) 729; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e64 v0, -v0 730; GCN-DAZ-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 731; GCN-DAZ-UNSAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 732; GCN-DAZ-UNSAFE-NEXT: s_endpgm 733; 734; GCN-IEEE-UNSAFE-LABEL: neg_rsq_neg_f32: 735; GCN-IEEE-UNSAFE: ; %bb.0: 736; GCN-IEEE-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 737; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 738; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s6, -1 739; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s10, s6 740; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s11, s7 741; GCN-IEEE-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 742; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s8, s2 743; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s9, s3 744; GCN-IEEE-UNSAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 745; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s4, s0 746; GCN-IEEE-UNSAFE-NEXT: s_mov_b32 s5, s1 747; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) 748; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e64 v0, -v0 749; GCN-IEEE-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 750; GCN-IEEE-UNSAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 751; GCN-IEEE-UNSAFE-NEXT: s_endpgm 752; 753; GCN-DAZ-SAFE-LABEL: neg_rsq_neg_f32: 754; GCN-DAZ-SAFE: ; %bb.0: 755; GCN-DAZ-SAFE-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 756; GCN-DAZ-SAFE-NEXT: s_mov_b32 s7, 0xf000 757; GCN-DAZ-SAFE-NEXT: s_mov_b32 s6, -1 758; GCN-DAZ-SAFE-NEXT: s_mov_b32 s10, s6 759; GCN-DAZ-SAFE-NEXT: s_mov_b32 s11, s7 760; GCN-DAZ-SAFE-NEXT: s_waitcnt lgkmcnt(0) 761; GCN-DAZ-SAFE-NEXT: s_mov_b32 s8, s2 762; GCN-DAZ-SAFE-NEXT: s_mov_b32 s9, s3 763; GCN-DAZ-SAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 764; GCN-DAZ-SAFE-NEXT: s_mov_b32 s2, 0x8f800000 765; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 766; GCN-DAZ-SAFE-NEXT: s_mov_b32 s4, s0 767; GCN-DAZ-SAFE-NEXT: s_mov_b32 s5, s1 768; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) 769; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, 0xcf800000, v0 770; GCN-DAZ-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s2, v0 771; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc 772; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v1, v0 773; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, v0, v1 774; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, 0.5, v1 775; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v1, v3, 0.5 776; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, v3, v4, v3 777; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v1, v4, v1 778; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v3, v3, v0 779; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v4, v1, v3 780; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v1 781; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 782; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 783; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 784; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v0, -v0 785; GCN-DAZ-SAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 786; GCN-DAZ-SAFE-NEXT: s_endpgm 787; 788; SI-IEEE-SAFE-LABEL: neg_rsq_neg_f32: 789; SI-IEEE-SAFE: ; %bb.0: 790; SI-IEEE-SAFE-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 791; SI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0xf000 792; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, -1 793; SI-IEEE-SAFE-NEXT: s_mov_b32 s2, s6 794; SI-IEEE-SAFE-NEXT: s_mov_b32 s3, s7 795; SI-IEEE-SAFE-NEXT: s_waitcnt lgkmcnt(0) 796; SI-IEEE-SAFE-NEXT: s_mov_b32 s0, s10 797; SI-IEEE-SAFE-NEXT: s_mov_b32 s1, s11 798; SI-IEEE-SAFE-NEXT: buffer_load_dword v0, off, s[0:3], 0 799; SI-IEEE-SAFE-NEXT: s_mov_b32 s0, 0x8f800000 800; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x260 801; SI-IEEE-SAFE-NEXT: s_mov_b32 s2, 0x7f800000 802; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, s8 803; SI-IEEE-SAFE-NEXT: s_mov_b32 s5, s9 804; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) 805; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0xcf800000, v0 806; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, v0 807; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, s[0:1] 808; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v0 809; SI-IEEE-SAFE-NEXT: v_add_i32_e32 v3, vcc, -1, v2 810; SI-IEEE-SAFE-NEXT: v_add_i32_e32 v4, vcc, 1, v2 811; SI-IEEE-SAFE-NEXT: v_fma_f32 v5, -v3, v2, v0 812; SI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v4, v2, v0 813; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v5 814; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 815; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v6 816; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 817; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 818; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] 819; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 820; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 821; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v1, -v0 822; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[0:1], |v0|, s2 823; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, -v0, v1, s[0:1] 824; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 825; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 826; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 827; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 828; SI-IEEE-SAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 829; SI-IEEE-SAFE-NEXT: s_endpgm 830; 831; CI-IEEE-SAFE-LABEL: neg_rsq_neg_f32: 832; CI-IEEE-SAFE: ; %bb.0: 833; CI-IEEE-SAFE-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 834; CI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0xf000 835; CI-IEEE-SAFE-NEXT: s_mov_b32 s6, -1 836; CI-IEEE-SAFE-NEXT: s_mov_b32 s2, s6 837; CI-IEEE-SAFE-NEXT: s_mov_b32 s3, s7 838; CI-IEEE-SAFE-NEXT: s_waitcnt lgkmcnt(0) 839; CI-IEEE-SAFE-NEXT: s_mov_b32 s0, s10 840; CI-IEEE-SAFE-NEXT: s_mov_b32 s1, s11 841; CI-IEEE-SAFE-NEXT: buffer_load_dword v0, off, s[0:3], 0 842; CI-IEEE-SAFE-NEXT: s_mov_b32 s0, 0x8f800000 843; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v1, 0x260 844; CI-IEEE-SAFE-NEXT: s_mov_b32 s4, s8 845; CI-IEEE-SAFE-NEXT: s_mov_b32 s5, s9 846; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) 847; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0xcf800000, v0 848; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, v0 849; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, s[0:1] 850; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v0 851; CI-IEEE-SAFE-NEXT: v_add_i32_e32 v3, vcc, -1, v2 852; CI-IEEE-SAFE-NEXT: v_add_i32_e32 v4, vcc, 1, v2 853; CI-IEEE-SAFE-NEXT: v_fma_f32 v5, -v3, v2, v0 854; CI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v4, v2, v0 855; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e32 vcc, 0, v5 856; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 857; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, 0, v6 858; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 859; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 860; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[0:1] 861; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v1 862; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 863; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v1, -v0 864; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 865; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 866; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 867; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 868; CI-IEEE-SAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 869; CI-IEEE-SAFE-NEXT: s_endpgm 870; GCN-UNSAFE-LABEL: neg_rsq_neg_f32: 871; GCN-UNSAFE: ; %bb.0: 872; GCN-UNSAFE-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 873; GCN-UNSAFE-NEXT: s_mov_b32 s7, 0xf000 874; GCN-UNSAFE-NEXT: s_mov_b32 s6, -1 875; GCN-UNSAFE-NEXT: s_mov_b32 s10, s6 876; GCN-UNSAFE-NEXT: s_mov_b32 s11, s7 877; GCN-UNSAFE-NEXT: s_waitcnt lgkmcnt(0) 878; GCN-UNSAFE-NEXT: s_mov_b32 s8, s2 879; GCN-UNSAFE-NEXT: s_mov_b32 s9, s3 880; GCN-UNSAFE-NEXT: buffer_load_dword v0, off, s[8:11], 0 881; GCN-UNSAFE-NEXT: s_mov_b32 s4, s0 882; GCN-UNSAFE-NEXT: s_mov_b32 s5, s1 883; GCN-UNSAFE-NEXT: s_waitcnt vmcnt(0) 884; GCN-UNSAFE-NEXT: v_sqrt_f32_e64 v0, -v0 885; GCN-UNSAFE-NEXT: v_rcp_f32_e64 v0, -v0 886; GCN-UNSAFE-NEXT: buffer_store_dword v0, off, s[4:7], 0 887; GCN-UNSAFE-NEXT: s_endpgm 888 %val = load float, ptr addrspace(1) %in, align 4 889 %val.fneg = fneg float %val 890 %sqrt = call contract float @llvm.sqrt.f32(float %val.fneg) 891 %div = fdiv contract float -1.0, %sqrt, !fpmath !0 892 store float %div, ptr addrspace(1) %out, align 4 893 ret void 894} 895 896define float @v_neg_rsq_neg_f32(float %val) { 897; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_neg_f32: 898; GCN-DAZ-UNSAFE: ; %bb.0: 899; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 900; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e64 v0, -v0 901; GCN-DAZ-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 902; GCN-DAZ-UNSAFE-NEXT: s_setpc_b64 s[30:31] 903; 904; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_neg_f32: 905; GCN-IEEE-UNSAFE: ; %bb.0: 906; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 907; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e64 v0, -v0 908; GCN-IEEE-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 909; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 910; 911; GCN-DAZ-SAFE-LABEL: v_neg_rsq_neg_f32: 912; GCN-DAZ-SAFE: ; %bb.0: 913; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 914; GCN-DAZ-SAFE-NEXT: s_mov_b32 s4, 0x8f800000 915; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, 0xcf800000, v0 916; GCN-DAZ-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 917; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc 918; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v1, v0 919; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, v0, v1 920; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, 0.5, v1 921; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, -v1, v2, 0.5 922; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v2, v3, v2 923; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v2, v2, v0 924; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v1, v3, v1 925; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v4, v1, v2 926; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 927; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 928; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 929; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 930; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 931; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v0, -v0 932; GCN-DAZ-SAFE-NEXT: s_setpc_b64 s[30:31] 933; 934; SI-IEEE-SAFE-LABEL: v_neg_rsq_neg_f32: 935; SI-IEEE-SAFE: ; %bb.0: 936; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 937; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x8f800000 938; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, 0xcf800000, v0 939; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 940; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc 941; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 942; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 943; SI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 944; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 945; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 946; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 947; SI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 948; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 949; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 950; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 951; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 952; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 953; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 954; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 955; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x7f800000 956; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v1, -v0 957; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 958; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, -v0, v1, s[4:5] 959; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 960; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 961; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 962; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 963; SI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 964; 965; CI-IEEE-SAFE-LABEL: v_neg_rsq_neg_f32: 966; CI-IEEE-SAFE: ; %bb.0: 967; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 968; CI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x8f800000 969; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, 0xcf800000, v0 970; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 971; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc 972; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 973; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 974; CI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 975; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 976; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 977; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 978; CI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 979; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 980; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 981; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 982; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 983; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 984; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 985; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 986; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v1, -v0 987; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 988; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 989; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 990; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 991; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 992 %val.fneg = fneg float %val 993 %sqrt = call contract float @llvm.sqrt.f32(float %val.fneg) 994 %div = fdiv contract float -1.0, %sqrt, !fpmath !0 995 ret float %div 996} 997 998define <2 x float> @v_neg_rsq_neg_v2f32(<2 x float> %val) { 999; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_neg_v2f32: 1000; GCN-DAZ-UNSAFE: ; %bb.0: 1001; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1002; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e64 v0, -v0 1003; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e64 v1, -v1 1004; GCN-DAZ-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1005; GCN-DAZ-UNSAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1006; GCN-DAZ-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1007; 1008; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_neg_v2f32: 1009; GCN-IEEE-UNSAFE: ; %bb.0: 1010; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1011; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e64 v0, -v0 1012; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e64 v1, -v1 1013; GCN-IEEE-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1014; GCN-IEEE-UNSAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1015; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1016; 1017; GCN-DAZ-SAFE-LABEL: v_neg_rsq_neg_v2f32: 1018; GCN-DAZ-SAFE: ; %bb.0: 1019; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1020; GCN-DAZ-SAFE-NEXT: s_mov_b32 s4, 0x8f800000 1021; GCN-DAZ-SAFE-NEXT: s_mov_b32 s5, 0x4f800000 1022; GCN-DAZ-SAFE-NEXT: v_mul_f32_e64 v2, -v1, s5 1023; GCN-DAZ-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 1024; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e64 v1, -v1, v2, vcc 1025; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v2, v1 1026; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, v1, v2 1027; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, 0.5, v2 1028; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v2, v3, 0.5 1029; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, v3, v4, v3 1030; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, -v3, v3, v1 1031; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v2, v4, v2 1032; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v5, v2, v3 1033; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1034; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1035; GCN-DAZ-SAFE-NEXT: v_mul_f32_e64 v3, -v0, s5 1036; GCN-DAZ-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 1037; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v3, vcc 1038; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v3, v0 1039; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v4, 0x260 1040; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v4 1041; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[4:5] 1042; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, v0, v3 1043; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0.5, v3 1044; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, -v3, v2, 0.5 1045; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v2, v5, v2 1046; GCN-DAZ-SAFE-NEXT: v_fma_f32 v6, -v2, v2, v0 1047; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, v3, v5, v3 1048; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v6, v3, v2 1049; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1050; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1051; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v4 1052; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1053; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v0, -v0 1054; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v1, -v1 1055; GCN-DAZ-SAFE-NEXT: s_setpc_b64 s[30:31] 1056; 1057; SI-IEEE-SAFE-LABEL: v_neg_rsq_neg_v2f32: 1058; SI-IEEE-SAFE: ; %bb.0: 1059; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1060; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0x8f800000 1061; SI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0x4f800000 1062; SI-IEEE-SAFE-NEXT: v_mul_f32_e64 v2, -v1, s7 1063; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v1 1064; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, -v1, v2, vcc 1065; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v1 1066; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 1067; SI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v3, v2, v1 1068; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 1069; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 1070; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2 1071; SI-IEEE-SAFE-NEXT: v_fma_f32 v2, -v4, v2, v1 1072; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2 1073; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5] 1074; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1075; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1076; SI-IEEE-SAFE-NEXT: v_mul_f32_e64 v4, -v0, s7 1077; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 1078; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v4, vcc 1079; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v4, v0 1080; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v3, 0x260 1081; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v3 1082; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[4:5] 1083; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v4 1084; SI-IEEE-SAFE-NEXT: v_fma_f32 v5, -v2, v4, v0 1085; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 1086; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5] 1087; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v4 1088; SI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v5, v4, v0 1089; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4 1090; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] 1091; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v4, 0x37800000, v2 1092; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 1093; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v3 1094; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1095; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0x7f800000 1096; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v0 1097; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s6 1098; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, -v0, v2, s[4:5] 1099; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1100; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1101; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1102; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v2, v0 1103; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v1 1104; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v1|, s6 1105; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, -v1, v2, s[4:5] 1106; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1107; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 1108; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 1109; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v1, v2, v1 1110; SI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1111; 1112; CI-IEEE-SAFE-LABEL: v_neg_rsq_neg_v2f32: 1113; CI-IEEE-SAFE: ; %bb.0: 1114; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1115; CI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0x8f800000 1116; CI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0x4f800000 1117; CI-IEEE-SAFE-NEXT: v_mul_f32_e64 v2, -v1, s7 1118; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v1 1119; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, -v1, v2, vcc 1120; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v1 1121; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 1122; CI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v3, v2, v1 1123; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 1124; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 1125; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2 1126; CI-IEEE-SAFE-NEXT: v_fma_f32 v2, -v4, v2, v1 1127; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2 1128; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5] 1129; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1130; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1131; CI-IEEE-SAFE-NEXT: v_mul_f32_e64 v4, -v0, s7 1132; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 1133; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v4, vcc 1134; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v4, v0 1135; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v3, 0x260 1136; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v3 1137; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[4:5] 1138; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v4 1139; CI-IEEE-SAFE-NEXT: v_fma_f32 v5, -v2, v4, v0 1140; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 1141; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5] 1142; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v4 1143; CI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v5, v4, v0 1144; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4 1145; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] 1146; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v4, 0x37800000, v2 1147; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 1148; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v3 1149; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1150; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v0 1151; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1152; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1153; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1154; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v2, v0 1155; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v1 1156; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1157; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 1158; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 1159; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v1, v2, v1 1160; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1161 %val.fneg = fneg <2 x float> %val 1162 %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val.fneg) 1163 %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0 1164 ret <2 x float> %div 1165} 1166 1167define float @v_neg_rsq_neg_f32_foldable_user(float %val0, float %val1) { 1168; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_neg_f32_foldable_user: 1169; GCN-DAZ-UNSAFE: ; %bb.0: 1170; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1171; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e64 v0, -v0 1172; GCN-DAZ-UNSAFE-NEXT: v_mul_f32_e64 v0, -v0, v1 1173; GCN-DAZ-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1174; 1175; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_neg_f32_foldable_user: 1176; GCN-IEEE-UNSAFE: ; %bb.0: 1177; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1178; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e64 v0, -v0 1179; GCN-IEEE-UNSAFE-NEXT: v_mul_f32_e64 v0, -v0, v1 1180; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1181; 1182; GCN-DAZ-SAFE-LABEL: v_neg_rsq_neg_f32_foldable_user: 1183; GCN-DAZ-SAFE: ; %bb.0: 1184; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1185; GCN-DAZ-SAFE-NEXT: s_mov_b32 s4, 0x8f800000 1186; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, 0xcf800000, v0 1187; GCN-DAZ-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 1188; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc 1189; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v2, v0 1190; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, v0, v2 1191; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, 0.5, v2 1192; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v2, v3, 0.5 1193; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, v3, v4, v3 1194; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, -v3, v3, v0 1195; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v2, v4, v2 1196; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v5, v2, v3 1197; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1198; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1199; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v3, 0x260 1200; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v3 1201; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1202; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v0, -v0 1203; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v0, v0, v1 1204; GCN-DAZ-SAFE-NEXT: s_setpc_b64 s[30:31] 1205; 1206; SI-IEEE-SAFE-LABEL: v_neg_rsq_neg_f32_foldable_user: 1207; SI-IEEE-SAFE: ; %bb.0: 1208; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1209; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x8f800000 1210; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0xcf800000, v0 1211; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 1212; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc 1213; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v0 1214; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 1215; SI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v3, v2, v0 1216; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 1217; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 1218; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2 1219; SI-IEEE-SAFE-NEXT: v_fma_f32 v2, -v4, v2, v0 1220; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2 1221; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5] 1222; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1223; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1224; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v3, 0x260 1225; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v3 1226; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1227; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x7f800000 1228; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v0 1229; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 1230; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, -v0, v2, s[4:5] 1231; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1232; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1233; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1234; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v2, v0 1235; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v1 1236; SI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1237; 1238; CI-IEEE-SAFE-LABEL: v_neg_rsq_neg_f32_foldable_user: 1239; CI-IEEE-SAFE: ; %bb.0: 1240; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1241; CI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x8f800000 1242; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0xcf800000, v0 1243; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 1244; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc 1245; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v0 1246; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 1247; CI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v3, v2, v0 1248; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 1249; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 1250; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2 1251; CI-IEEE-SAFE-NEXT: v_fma_f32 v2, -v4, v2, v0 1252; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2 1253; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5] 1254; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1255; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1256; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v3, 0x260 1257; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v3 1258; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1259; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v0 1260; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1261; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1262; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1263; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v2, v0 1264; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v1 1265; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1266 %val0.neg = fneg float %val0 1267 %sqrt = call contract float @llvm.sqrt.f32(float %val0.neg) 1268 %div = fdiv contract float -1.0, %sqrt, !fpmath !0 1269 %user = fmul contract float %div, %val1 1270 ret float %user 1271} 1272 1273define <2 x float> @v_neg_rsq_neg_v2f32_foldable_user(<2 x float> %val0, <2 x float> %val1) { 1274; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_neg_v2f32_foldable_user: 1275; GCN-DAZ-UNSAFE: ; %bb.0: 1276; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1277; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e64 v0, -v0 1278; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e64 v1, -v1 1279; GCN-DAZ-UNSAFE-NEXT: v_mul_f32_e64 v0, -v0, v2 1280; GCN-DAZ-UNSAFE-NEXT: v_mul_f32_e64 v1, -v1, v3 1281; GCN-DAZ-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1282; 1283; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_neg_v2f32_foldable_user: 1284; GCN-IEEE-UNSAFE: ; %bb.0: 1285; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1286; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e64 v0, -v0 1287; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e64 v1, -v1 1288; GCN-IEEE-UNSAFE-NEXT: v_mul_f32_e64 v0, -v0, v2 1289; GCN-IEEE-UNSAFE-NEXT: v_mul_f32_e64 v1, -v1, v3 1290; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1291; 1292; GCN-DAZ-SAFE-LABEL: v_neg_rsq_neg_v2f32_foldable_user: 1293; GCN-DAZ-SAFE: ; %bb.0: 1294; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1295; GCN-DAZ-SAFE-NEXT: s_mov_b32 s4, 0x8f800000 1296; GCN-DAZ-SAFE-NEXT: s_mov_b32 s5, 0x4f800000 1297; GCN-DAZ-SAFE-NEXT: v_mul_f32_e64 v4, -v1, s5 1298; GCN-DAZ-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v1 1299; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e64 v1, -v1, v4, vcc 1300; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v4, v1 1301; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v5, v1, v4 1302; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v4, 0.5, v4 1303; GCN-DAZ-SAFE-NEXT: v_fma_f32 v6, -v4, v5, 0.5 1304; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, v5, v6, v5 1305; GCN-DAZ-SAFE-NEXT: v_fma_f32 v7, -v5, v5, v1 1306; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, v4, v6, v4 1307; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, v7, v4, v5 1308; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 1309; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1310; GCN-DAZ-SAFE-NEXT: v_mul_f32_e64 v5, -v0, s5 1311; GCN-DAZ-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s4, v0 1312; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v5, vcc 1313; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v5, v0 1314; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v6, 0x260 1315; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v6 1316; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] 1317; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v4, v0, v5 1318; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v5, 0.5, v5 1319; GCN-DAZ-SAFE-NEXT: v_fma_f32 v7, -v5, v4, 0.5 1320; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, v4, v7, v4 1321; GCN-DAZ-SAFE-NEXT: v_fma_f32 v8, -v4, v4, v0 1322; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, v5, v7, v5 1323; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, v8, v5, v4 1324; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 1325; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1326; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v6 1327; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 1328; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v0, -v0 1329; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v1, -v1 1330; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v0, v0, v2 1331; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, v1, v3 1332; GCN-DAZ-SAFE-NEXT: s_setpc_b64 s[30:31] 1333; 1334; SI-IEEE-SAFE-LABEL: v_neg_rsq_neg_v2f32_foldable_user: 1335; SI-IEEE-SAFE: ; %bb.0: 1336; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1337; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0x8f800000 1338; SI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0x4f800000 1339; SI-IEEE-SAFE-NEXT: v_mul_f32_e64 v4, -v1, s7 1340; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v1 1341; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, -v1, v4, vcc 1342; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v4, v1 1343; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v5, s[4:5], -1, v4 1344; SI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v5, v4, v1 1345; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6 1346; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[4:5] 1347; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v4 1348; SI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v6, v4, v1 1349; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4 1350; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v5, v6, s[4:5] 1351; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 1352; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1353; SI-IEEE-SAFE-NEXT: v_mul_f32_e64 v6, -v0, s7 1354; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 1355; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v6, vcc 1356; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v6, v0 1357; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v5, 0x260 1358; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v5 1359; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] 1360; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v6 1361; SI-IEEE-SAFE-NEXT: v_fma_f32 v7, -v4, v6, v0 1362; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v7 1363; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5] 1364; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v7, s[4:5], 1, v6 1365; SI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v7, v6, v0 1366; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v6 1367; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] 1368; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v6, 0x37800000, v4 1369; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 1370; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 1371; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 1372; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0x7f800000 1373; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v4, -v0 1374; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s6 1375; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, -v0, v4, s[4:5] 1376; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v4, v4 1377; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1378; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1379; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v4, v0 1380; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v4, -v1 1381; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v1|, s6 1382; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, -v1, v4, s[4:5] 1383; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v4, v4 1384; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 1385; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 1386; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v1, v4, v1 1387; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v2 1388; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, v1, v3 1389; SI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1390; 1391; CI-IEEE-SAFE-LABEL: v_neg_rsq_neg_v2f32_foldable_user: 1392; CI-IEEE-SAFE: ; %bb.0: 1393; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1394; CI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0x8f800000 1395; CI-IEEE-SAFE-NEXT: s_mov_b32 s7, 0x4f800000 1396; CI-IEEE-SAFE-NEXT: v_mul_f32_e64 v4, -v1, s7 1397; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v1 1398; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, -v1, v4, vcc 1399; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v4, v1 1400; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v5, s[4:5], -1, v4 1401; CI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v5, v4, v1 1402; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6 1403; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[4:5] 1404; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v4 1405; CI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v6, v4, v1 1406; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4 1407; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v5, v6, s[4:5] 1408; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 1409; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1410; CI-IEEE-SAFE-NEXT: v_mul_f32_e64 v6, -v0, s7 1411; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e32 vcc, s6, v0 1412; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v6, vcc 1413; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v6, v0 1414; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v5, 0x260 1415; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v5 1416; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] 1417; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v6 1418; CI-IEEE-SAFE-NEXT: v_fma_f32 v7, -v4, v6, v0 1419; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v7 1420; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5] 1421; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v7, s[4:5], 1, v6 1422; CI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v7, v6, v0 1423; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v6 1424; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] 1425; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v6, 0x37800000, v4 1426; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 1427; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 1428; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 1429; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v4, -v0 1430; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v4, v4 1431; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1432; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1433; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v4, v0 1434; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v4, -v1 1435; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v4, v4 1436; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 1437; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 1438; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v1, v4, v1 1439; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v2 1440; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, v1, v3 1441; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1442 %val0.fneg = fneg <2 x float> %val0 1443 %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0.fneg) 1444 %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0 1445 %user = fmul contract <2 x float> %div, %val1 1446 ret <2 x float> %user 1447} 1448 1449define float @v_neg_rsq_f32(float %val) { 1450; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_f32: 1451; GCN-DAZ-UNSAFE: ; %bb.0: 1452; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1453; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 1454; GCN-DAZ-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1455; GCN-DAZ-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1456; 1457; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_f32: 1458; GCN-IEEE-UNSAFE: ; %bb.0: 1459; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1460; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 1461; GCN-IEEE-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1462; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1463; 1464; GCN-DAZ-SAFE-LABEL: v_neg_rsq_f32: 1465; GCN-DAZ-SAFE: ; %bb.0: 1466; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1467; GCN-DAZ-SAFE-NEXT: s_mov_b32 s4, 0xf800000 1468; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 1469; GCN-DAZ-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1470; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1471; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v1, v0 1472; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, v0, v1 1473; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, 0.5, v1 1474; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, -v1, v2, 0.5 1475; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v2, v3, v2 1476; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v2, v2, v0 1477; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v1, v3, v1 1478; GCN-DAZ-SAFE-NEXT: v_fma_f32 v1, v4, v1, v2 1479; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1480; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1481; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 1482; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1483; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1484; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v0, -v0 1485; GCN-DAZ-SAFE-NEXT: s_setpc_b64 s[30:31] 1486; 1487; SI-IEEE-SAFE-LABEL: v_neg_rsq_f32: 1488; SI-IEEE-SAFE: ; %bb.0: 1489; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1490; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0xf800000 1491; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 1492; SI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1493; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1494; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 1495; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 1496; SI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 1497; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 1498; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 1499; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 1500; SI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 1501; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 1502; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 1503; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1504; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1505; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 1506; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1507; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1508; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x7f800000 1509; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v1, -v0 1510; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 1511; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, -v0, v1, s[4:5] 1512; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 1513; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1514; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1515; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 1516; SI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1517; 1518; CI-IEEE-SAFE-LABEL: v_neg_rsq_f32: 1519; CI-IEEE-SAFE: ; %bb.0: 1520; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1521; CI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0xf800000 1522; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 1523; CI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1524; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1525; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 1526; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 1527; CI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 1528; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 1529; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 1530; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 1531; CI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 1532; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 1533; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 1534; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 1535; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1536; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 1537; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 1538; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1539; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v1, -v0 1540; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 1541; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1542; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1543; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 1544; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1545 %sqrt = call contract float @llvm.sqrt.f32(float %val) 1546 %div = fdiv contract float -1.0, %sqrt, !fpmath !0 1547 ret float %div 1548} 1549 1550define <2 x float> @v_neg_rsq_v2f32(<2 x float> %val) { 1551; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_v2f32: 1552; GCN-DAZ-UNSAFE: ; %bb.0: 1553; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1554; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 1555; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v1, v1 1556; GCN-DAZ-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1557; GCN-DAZ-UNSAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1558; GCN-DAZ-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1559; 1560; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_v2f32: 1561; GCN-IEEE-UNSAFE: ; %bb.0: 1562; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1563; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 1564; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v1, v1 1565; GCN-IEEE-UNSAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1566; GCN-IEEE-UNSAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1567; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1568; 1569; GCN-DAZ-SAFE-LABEL: v_neg_rsq_v2f32: 1570; GCN-DAZ-SAFE: ; %bb.0: 1571; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1572; GCN-DAZ-SAFE-NEXT: s_mov_b32 s4, 0xf800000 1573; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v1 1574; GCN-DAZ-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 1575; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1576; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v2, v1 1577; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, v1, v2 1578; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, 0.5, v2 1579; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v2, v3, 0.5 1580; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, v3, v4, v3 1581; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, -v3, v3, v1 1582; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v2, v4, v2 1583; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v5, v2, v3 1584; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1585; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1586; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0x4f800000, v0 1587; GCN-DAZ-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1588; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1589; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v3, v0 1590; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v4, 0x260 1591; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v4 1592; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[4:5] 1593; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, v0, v3 1594; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0.5, v3 1595; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, -v3, v2, 0.5 1596; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v2, v5, v2 1597; GCN-DAZ-SAFE-NEXT: v_fma_f32 v6, -v2, v2, v0 1598; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, v3, v5, v3 1599; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v6, v3, v2 1600; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1601; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1602; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v4 1603; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1604; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v0, -v0 1605; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v1, -v1 1606; GCN-DAZ-SAFE-NEXT: s_setpc_b64 s[30:31] 1607; 1608; SI-IEEE-SAFE-LABEL: v_neg_rsq_v2f32: 1609; SI-IEEE-SAFE: ; %bb.0: 1610; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1611; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0xf800000 1612; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v1 1613; SI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1 1614; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1615; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v1 1616; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 1617; SI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v3, v2, v1 1618; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 1619; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 1620; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2 1621; SI-IEEE-SAFE-NEXT: v_fma_f32 v2, -v4, v2, v1 1622; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2 1623; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5] 1624; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1625; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1626; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v4, 0x4f800000, v0 1627; SI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0 1628; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1629; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v4, v0 1630; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v3, 0x260 1631; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v3 1632; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[4:5] 1633; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v4 1634; SI-IEEE-SAFE-NEXT: v_fma_f32 v5, -v2, v4, v0 1635; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 1636; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5] 1637; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v4 1638; SI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v5, v4, v0 1639; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4 1640; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] 1641; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v4, 0x37800000, v2 1642; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 1643; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v3 1644; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1645; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0x7f800000 1646; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v0 1647; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s6 1648; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, -v0, v2, s[4:5] 1649; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1650; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1651; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1652; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v2, v0 1653; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v1 1654; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v1|, s6 1655; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, -v1, v2, s[4:5] 1656; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1657; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 1658; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 1659; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v1, v2, v1 1660; SI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1661; 1662; CI-IEEE-SAFE-LABEL: v_neg_rsq_v2f32: 1663; CI-IEEE-SAFE: ; %bb.0: 1664; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1665; CI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0xf800000 1666; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v1 1667; CI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1 1668; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1669; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v1 1670; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 1671; CI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v3, v2, v1 1672; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 1673; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 1674; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2 1675; CI-IEEE-SAFE-NEXT: v_fma_f32 v2, -v4, v2, v1 1676; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2 1677; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5] 1678; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1679; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1680; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v4, 0x4f800000, v0 1681; CI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0 1682; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1683; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v4, v0 1684; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v3, 0x260 1685; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v3 1686; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[4:5] 1687; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v4 1688; CI-IEEE-SAFE-NEXT: v_fma_f32 v5, -v2, v4, v0 1689; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v5 1690; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5] 1691; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v5, s[4:5], 1, v4 1692; CI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v5, v4, v0 1693; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4 1694; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5] 1695; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v4, 0x37800000, v2 1696; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 1697; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v3 1698; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1699; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v0 1700; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1701; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1702; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1703; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v2, v0 1704; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v1 1705; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1706; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 1707; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 1708; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v1, v2, v1 1709; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1710 %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val) 1711 %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0 1712 ret <2 x float> %div 1713} 1714 1715define float @v_neg_rsq_f32_foldable_user(float %val0, float %val1) { 1716; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_f32_foldable_user: 1717; GCN-DAZ-UNSAFE: ; %bb.0: 1718; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1719; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 1720; GCN-DAZ-UNSAFE-NEXT: v_mul_f32_e64 v0, -v0, v1 1721; GCN-DAZ-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1722; 1723; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_f32_foldable_user: 1724; GCN-IEEE-UNSAFE: ; %bb.0: 1725; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1726; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 1727; GCN-IEEE-UNSAFE-NEXT: v_mul_f32_e64 v0, -v0, v1 1728; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1729; 1730; GCN-DAZ-SAFE-LABEL: v_neg_rsq_f32_foldable_user: 1731; GCN-DAZ-SAFE: ; %bb.0: 1732; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1733; GCN-DAZ-SAFE-NEXT: s_mov_b32 s4, 0xf800000 1734; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1735; GCN-DAZ-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1736; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1737; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v2, v0 1738; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, v0, v2 1739; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v2, 0.5, v2 1740; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, -v2, v3, 0.5 1741; GCN-DAZ-SAFE-NEXT: v_fma_f32 v3, v3, v4, v3 1742; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, -v3, v3, v0 1743; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v2, v4, v2 1744; GCN-DAZ-SAFE-NEXT: v_fma_f32 v2, v5, v2, v3 1745; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1746; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1747; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v3, 0x260 1748; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v3 1749; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1750; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v0, -v0 1751; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v0, v0, v1 1752; GCN-DAZ-SAFE-NEXT: s_setpc_b64 s[30:31] 1753; 1754; SI-IEEE-SAFE-LABEL: v_neg_rsq_f32_foldable_user: 1755; SI-IEEE-SAFE: ; %bb.0: 1756; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1757; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0xf800000 1758; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1759; SI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1760; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1761; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v0 1762; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 1763; SI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v3, v2, v0 1764; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 1765; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 1766; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2 1767; SI-IEEE-SAFE-NEXT: v_fma_f32 v2, -v4, v2, v0 1768; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2 1769; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5] 1770; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1771; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1772; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v3, 0x260 1773; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v3 1774; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1775; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x7f800000 1776; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v0 1777; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 1778; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, -v0, v2, s[4:5] 1779; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1780; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1781; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1782; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v2, v0 1783; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v1 1784; SI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1785; 1786; CI-IEEE-SAFE-LABEL: v_neg_rsq_f32_foldable_user: 1787; CI-IEEE-SAFE: ; %bb.0: 1788; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1789; CI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0xf800000 1790; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x4f800000, v0 1791; CI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1792; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1793; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v2, v0 1794; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], -1, v2 1795; CI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v3, v2, v0 1796; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v4 1797; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v3, v2, v3, s[4:5] 1798; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], 1, v2 1799; CI-IEEE-SAFE-NEXT: v_fma_f32 v2, -v4, v2, v0 1800; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v2 1801; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v3, v4, s[4:5] 1802; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v3, 0x37800000, v2 1803; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1804; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v3, 0x260 1805; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v3 1806; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 1807; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v2, -v0 1808; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v2, v2 1809; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1810; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1811; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v2, v0 1812; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v1 1813; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1814 %sqrt = call contract float @llvm.sqrt.f32(float %val0) 1815 %div = fdiv contract float -1.0, %sqrt, !fpmath !0 1816 %user = fmul contract float %div, %val1 1817 ret float %user 1818} 1819 1820define <2 x float> @v_neg_rsq_v2f32_foldable_user(<2 x float> %val0, <2 x float> %val1) { 1821; GCN-DAZ-UNSAFE-LABEL: v_neg_rsq_v2f32_foldable_user: 1822; GCN-DAZ-UNSAFE: ; %bb.0: 1823; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1824; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 1825; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v1, v1 1826; GCN-DAZ-UNSAFE-NEXT: v_mul_f32_e64 v0, -v0, v2 1827; GCN-DAZ-UNSAFE-NEXT: v_mul_f32_e64 v1, -v1, v3 1828; GCN-DAZ-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1829; 1830; GCN-IEEE-UNSAFE-LABEL: v_neg_rsq_v2f32_foldable_user: 1831; GCN-IEEE-UNSAFE: ; %bb.0: 1832; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1833; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 1834; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v1, v1 1835; GCN-IEEE-UNSAFE-NEXT: v_mul_f32_e64 v0, -v0, v2 1836; GCN-IEEE-UNSAFE-NEXT: v_mul_f32_e64 v1, -v1, v3 1837; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 1838; 1839; GCN-DAZ-SAFE-LABEL: v_neg_rsq_v2f32_foldable_user: 1840; GCN-DAZ-SAFE: ; %bb.0: 1841; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1842; GCN-DAZ-SAFE-NEXT: s_mov_b32 s4, 0xf800000 1843; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v4, 0x4f800000, v1 1844; GCN-DAZ-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v1 1845; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1846; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v4, v1 1847; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v5, v1, v4 1848; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v4, 0.5, v4 1849; GCN-DAZ-SAFE-NEXT: v_fma_f32 v6, -v4, v5, 0.5 1850; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, v5, v6, v5 1851; GCN-DAZ-SAFE-NEXT: v_fma_f32 v7, -v5, v5, v1 1852; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, v4, v6, v4 1853; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, v7, v4, v5 1854; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 1855; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1856; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v5, 0x4f800000, v0 1857; GCN-DAZ-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 1858; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 1859; GCN-DAZ-SAFE-NEXT: v_rsq_f32_e32 v5, v0 1860; GCN-DAZ-SAFE-NEXT: v_mov_b32_e32 v6, 0x260 1861; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v6 1862; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] 1863; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v4, v0, v5 1864; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v5, 0.5, v5 1865; GCN-DAZ-SAFE-NEXT: v_fma_f32 v7, -v5, v4, 0.5 1866; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, v4, v7, v4 1867; GCN-DAZ-SAFE-NEXT: v_fma_f32 v8, -v4, v4, v0 1868; GCN-DAZ-SAFE-NEXT: v_fma_f32 v5, v5, v7, v5 1869; GCN-DAZ-SAFE-NEXT: v_fma_f32 v4, v8, v5, v4 1870; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 1871; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1872; GCN-DAZ-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v6 1873; GCN-DAZ-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 1874; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v0, -v0 1875; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e64 v1, -v1 1876; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v0, v0, v2 1877; GCN-DAZ-SAFE-NEXT: v_mul_f32_e32 v1, v1, v3 1878; GCN-DAZ-SAFE-NEXT: s_setpc_b64 s[30:31] 1879; 1880; SI-IEEE-SAFE-LABEL: v_neg_rsq_v2f32_foldable_user: 1881; SI-IEEE-SAFE: ; %bb.0: 1882; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1883; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0xf800000 1884; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v4, 0x4f800000, v1 1885; SI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1 1886; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1887; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v4, v1 1888; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v5, s[4:5], -1, v4 1889; SI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v5, v4, v1 1890; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6 1891; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[4:5] 1892; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v4 1893; SI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v6, v4, v1 1894; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4 1895; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v5, v6, s[4:5] 1896; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 1897; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1898; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v6, 0x4f800000, v0 1899; SI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0 1900; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1901; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v6, v0 1902; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v5, 0x260 1903; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v5 1904; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] 1905; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v6 1906; SI-IEEE-SAFE-NEXT: v_fma_f32 v7, -v4, v6, v0 1907; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v7 1908; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5] 1909; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v7, s[4:5], 1, v6 1910; SI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v7, v6, v0 1911; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v6 1912; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] 1913; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v6, 0x37800000, v4 1914; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 1915; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 1916; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 1917; SI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0x7f800000 1918; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v4, -v0 1919; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s6 1920; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, -v0, v4, s[4:5] 1921; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v4, v4 1922; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1923; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1924; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v4, v0 1925; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v4, -v1 1926; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], |v1|, s6 1927; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, -v1, v4, s[4:5] 1928; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v4, v4 1929; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 1930; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 1931; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v1, v4, v1 1932; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v2 1933; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, v1, v3 1934; SI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1935; 1936; CI-IEEE-SAFE-LABEL: v_neg_rsq_v2f32_foldable_user: 1937; CI-IEEE-SAFE: ; %bb.0: 1938; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1939; CI-IEEE-SAFE-NEXT: s_mov_b32 s6, 0xf800000 1940; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v4, 0x4f800000, v1 1941; CI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v1 1942; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1943; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v4, v1 1944; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v5, s[4:5], -1, v4 1945; CI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v5, v4, v1 1946; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v6 1947; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v5, v4, v5, s[4:5] 1948; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v6, s[4:5], 1, v4 1949; CI-IEEE-SAFE-NEXT: v_fma_f32 v4, -v6, v4, v1 1950; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v4 1951; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v5, v6, s[4:5] 1952; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v5, 0x37800000, v4 1953; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc 1954; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v6, 0x4f800000, v0 1955; CI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s6, v0 1956; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 1957; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v6, v0 1958; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v5, 0x260 1959; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e64 s[4:5], v1, v5 1960; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] 1961; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v4, s[4:5], -1, v6 1962; CI-IEEE-SAFE-NEXT: v_fma_f32 v7, -v4, v6, v0 1963; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v7 1964; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5] 1965; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v7, s[4:5], 1, v6 1966; CI-IEEE-SAFE-NEXT: v_fma_f32 v6, -v7, v6, v0 1967; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v6 1968; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v4, v4, v7, s[4:5] 1969; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v6, 0x37800000, v4 1970; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 1971; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v5 1972; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 1973; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v4, -v0 1974; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v4, v4 1975; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 1976; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 1977; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v4, v0 1978; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e64 v4, -v1 1979; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v4, v4 1980; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 1981; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v1, vcc, 0, v1 1982; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v1, v4, v1 1983; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v0, v0, v2 1984; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, v1, v3 1985; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 1986 %sqrt = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %val0) 1987 %div = fdiv contract <2 x float> <float -1.0, float -1.0>, %sqrt, !fpmath !0 1988 %user = fmul contract <2 x float> %div, %val1 1989 ret <2 x float> %user 1990} 1991 1992define float @v_rsq_f32(float %val) { 1993; GCN-DAZ-LABEL: v_rsq_f32: 1994; GCN-DAZ: ; %bb.0: 1995; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1996; GCN-DAZ-NEXT: v_rsq_f32_e32 v0, v0 1997; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 1998; 1999; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32: 2000; GCN-IEEE-UNSAFE: ; %bb.0: 2001; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2002; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 2003; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 2004; 2005; GCN-IEEE-SAFE-LABEL: v_rsq_f32: 2006; GCN-IEEE-SAFE: ; %bb.0: 2007; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2008; GCN-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x800000 2009; GCN-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2010; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc 2011; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2012; GCN-IEEE-SAFE-NEXT: v_rsq_f32_e32 v0, v0 2013; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc 2014; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2015; GCN-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 2016 %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1 2017 %div = fdiv contract float 1.0, %sqrt, !fpmath !1 2018 ret float %div 2019} 2020 2021define { float, float } @v_rsq_f32_multi_use(float %val) { 2022; GCN-DAZ-UNSAFE-LABEL: v_rsq_f32_multi_use: 2023; GCN-DAZ-UNSAFE: ; %bb.0: 2024; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2025; GCN-DAZ-UNSAFE-NEXT: v_sqrt_f32_e32 v2, v0 2026; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v1, v0 2027; GCN-DAZ-UNSAFE-NEXT: v_mov_b32_e32 v0, v2 2028; GCN-DAZ-UNSAFE-NEXT: s_setpc_b64 s[30:31] 2029; 2030; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_multi_use: 2031; GCN-IEEE-UNSAFE: ; %bb.0: 2032; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2033; GCN-IEEE-UNSAFE-NEXT: v_sqrt_f32_e32 v2, v0 2034; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v1, v0 2035; GCN-IEEE-UNSAFE-NEXT: v_mov_b32_e32 v0, v2 2036; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 2037; 2038; GCN-DAZ-SAFE-LABEL: v_rsq_f32_multi_use: 2039; GCN-DAZ-SAFE: ; %bb.0: 2040; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2041; GCN-DAZ-SAFE-NEXT: v_sqrt_f32_e32 v0, v0 2042; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e32 v1, v0 2043; GCN-DAZ-SAFE-NEXT: s_setpc_b64 s[30:31] 2044; 2045; SI-IEEE-SAFE-LABEL: v_rsq_f32_multi_use: 2046; SI-IEEE-SAFE: ; %bb.0: 2047; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2048; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0xf800000 2049; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 2050; SI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2051; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2052; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 2053; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 2054; SI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 2055; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 2056; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 2057; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 2058; SI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 2059; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 2060; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 2061; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 2062; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2063; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 2064; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 2065; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 2066; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x7f800000 2067; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e32 v1, v0 2068; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2069; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 2070; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 2071; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 2072; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v2, vcc, 0, v2 2073; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2074; SI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 2075; 2076; CI-IEEE-SAFE-LABEL: v_rsq_f32_multi_use: 2077; CI-IEEE-SAFE: ; %bb.0: 2078; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2079; CI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0xf800000 2080; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 2081; CI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2082; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2083; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 2084; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 2085; CI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 2086; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 2087; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 2088; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 2089; CI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 2090; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 2091; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 2092; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 2093; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2094; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 2095; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 2096; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 2097; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e32 v1, v0 2098; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 2099; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 2100; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v2, vcc, 0, v2 2101; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v1, v1, v2 2102; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 2103 %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1 2104 %insert.0 = insertvalue { float, float } poison, float %sqrt, 0 2105 %div = fdiv contract float 1.0, %sqrt, !fpmath !1 2106 %insert.1 = insertvalue { float, float } %insert.0, float %div, 1 2107 ret { float, float } %insert.1 2108} 2109 2110define float @v_rsq_f32_missing_contract0(float %val) { 2111; GCN-DAZ-UNSAFE-LABEL: v_rsq_f32_missing_contract0: 2112; GCN-DAZ-UNSAFE: ; %bb.0: 2113; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2114; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 2115; GCN-DAZ-UNSAFE-NEXT: s_setpc_b64 s[30:31] 2116; 2117; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_missing_contract0: 2118; GCN-IEEE-UNSAFE: ; %bb.0: 2119; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2120; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 2121; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 2122; 2123; GCN-DAZ-SAFE-LABEL: v_rsq_f32_missing_contract0: 2124; GCN-DAZ-SAFE: ; %bb.0: 2125; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2126; GCN-DAZ-SAFE-NEXT: v_sqrt_f32_e32 v0, v0 2127; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e32 v0, v0 2128; GCN-DAZ-SAFE-NEXT: s_setpc_b64 s[30:31] 2129; 2130; SI-IEEE-SAFE-LABEL: v_rsq_f32_missing_contract0: 2131; SI-IEEE-SAFE: ; %bb.0: 2132; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2133; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0xf800000 2134; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 2135; SI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2136; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2137; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 2138; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 2139; SI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 2140; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 2141; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 2142; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 2143; SI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 2144; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 2145; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 2146; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 2147; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2148; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 2149; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 2150; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 2151; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x7f800000 2152; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e32 v1, v0 2153; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2154; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 2155; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 2156; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2157; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2158; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 2159; SI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 2160; 2161; CI-IEEE-SAFE-LABEL: v_rsq_f32_missing_contract0: 2162; CI-IEEE-SAFE: ; %bb.0: 2163; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2164; CI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0xf800000 2165; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 2166; CI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2167; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2168; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 2169; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 2170; CI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 2171; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 2172; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 2173; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 2174; CI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 2175; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 2176; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 2177; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 2178; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2179; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 2180; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 2181; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 2182; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e32 v1, v0 2183; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 2184; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2185; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2186; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 2187; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 2188 %sqrt = call float @llvm.sqrt.f32(float %val), !fpmath !1 2189 %div = fdiv contract float 1.0, %sqrt, !fpmath !1 2190 ret float %div 2191} 2192 2193define float @v_rsq_f32_missing_contract1(float %val) { 2194; GCN-DAZ-UNSAFE-LABEL: v_rsq_f32_missing_contract1: 2195; GCN-DAZ-UNSAFE: ; %bb.0: 2196; GCN-DAZ-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2197; GCN-DAZ-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 2198; GCN-DAZ-UNSAFE-NEXT: s_setpc_b64 s[30:31] 2199; 2200; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_missing_contract1: 2201; GCN-IEEE-UNSAFE: ; %bb.0: 2202; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2203; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 2204; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 2205; 2206; GCN-DAZ-SAFE-LABEL: v_rsq_f32_missing_contract1: 2207; GCN-DAZ-SAFE: ; %bb.0: 2208; GCN-DAZ-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2209; GCN-DAZ-SAFE-NEXT: v_sqrt_f32_e32 v0, v0 2210; GCN-DAZ-SAFE-NEXT: v_rcp_f32_e32 v0, v0 2211; GCN-DAZ-SAFE-NEXT: s_setpc_b64 s[30:31] 2212; 2213; SI-IEEE-SAFE-LABEL: v_rsq_f32_missing_contract1: 2214; SI-IEEE-SAFE: ; %bb.0: 2215; SI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2216; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0xf800000 2217; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 2218; SI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2219; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2220; SI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 2221; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 2222; SI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 2223; SI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 2224; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 2225; SI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 2226; SI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 2227; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 2228; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 2229; SI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 2230; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2231; SI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 2232; SI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 2233; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 2234; SI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x7f800000 2235; SI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e32 v1, v0 2236; SI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2237; SI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 2238; SI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 2239; SI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2240; SI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2241; SI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 2242; SI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 2243; 2244; CI-IEEE-SAFE-LABEL: v_rsq_f32_missing_contract1: 2245; CI-IEEE-SAFE: ; %bb.0: 2246; CI-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2247; CI-IEEE-SAFE-NEXT: s_mov_b32 s4, 0xf800000 2248; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 2249; CI-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2250; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2251; CI-IEEE-SAFE-NEXT: v_sqrt_f32_e32 v1, v0 2252; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v2, s[4:5], -1, v1 2253; CI-IEEE-SAFE-NEXT: v_fma_f32 v3, -v2, v1, v0 2254; CI-IEEE-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], 0, v3 2255; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, v1, v2, s[4:5] 2256; CI-IEEE-SAFE-NEXT: v_add_i32_e64 v3, s[4:5], 1, v1 2257; CI-IEEE-SAFE-NEXT: v_fma_f32 v1, -v3, v1, v0 2258; CI-IEEE-SAFE-NEXT: v_cmp_lt_f32_e64 s[4:5], 0, v1 2259; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, v2, v3, s[4:5] 2260; CI-IEEE-SAFE-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 2261; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2262; CI-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x260 2263; CI-IEEE-SAFE-NEXT: v_cmp_class_f32_e32 vcc, v0, v2 2264; CI-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 2265; CI-IEEE-SAFE-NEXT: v_frexp_mant_f32_e32 v1, v0 2266; CI-IEEE-SAFE-NEXT: v_rcp_f32_e32 v1, v1 2267; CI-IEEE-SAFE-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2268; CI-IEEE-SAFE-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 2269; CI-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v1, v0 2270; CI-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 2271 %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1 2272 %div = fdiv float 1.0, %sqrt, !fpmath !1 2273 ret float %div 2274} 2275 2276; Test that we contract into FMA for an fadd user after introducing 2277; the fmul. 2278define float @v_rsq_f32_contractable_user(float %val0, float %val1) { 2279; GCN-DAZ-LABEL: v_rsq_f32_contractable_user: 2280; GCN-DAZ: ; %bb.0: 2281; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2282; GCN-DAZ-NEXT: v_rsq_f32_e32 v0, v0 2283; GCN-DAZ-NEXT: v_add_f32_e32 v0, v0, v1 2284; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2285; 2286; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_contractable_user: 2287; GCN-IEEE-UNSAFE: ; %bb.0: 2288; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2289; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 2290; GCN-IEEE-UNSAFE-NEXT: v_add_f32_e32 v0, v0, v1 2291; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 2292; 2293; GCN-IEEE-SAFE-LABEL: v_rsq_f32_contractable_user: 2294; GCN-IEEE-SAFE: ; %bb.0: 2295; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2296; GCN-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x800000 2297; GCN-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2298; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, 0, 24, vcc 2299; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2300; GCN-IEEE-SAFE-NEXT: v_rsq_f32_e32 v0, v0 2301; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x45800000 2302; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc 2303; GCN-IEEE-SAFE-NEXT: v_fma_f32 v0, v0, v2, v1 2304; GCN-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 2305 %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1 2306 %div = fdiv contract float 1.0, %sqrt, !fpmath !1 2307 %add = fadd contract float %div, %val1 2308 ret float %add 2309} 2310 2311; Missing contract on the fdiv 2312define float @v_rsq_f32_contractable_user_missing_contract0(float %val0, float %val1) { 2313; GCN-DAZ-LABEL: v_rsq_f32_contractable_user_missing_contract0: 2314; GCN-DAZ: ; %bb.0: 2315; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2316; GCN-DAZ-NEXT: v_rsq_f32_e32 v0, v0 2317; GCN-DAZ-NEXT: v_add_f32_e32 v0, v0, v1 2318; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2319; 2320; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_contractable_user_missing_contract0: 2321; GCN-IEEE-UNSAFE: ; %bb.0: 2322; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2323; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 2324; GCN-IEEE-UNSAFE-NEXT: v_add_f32_e32 v0, v0, v1 2325; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 2326; 2327; GCN-IEEE-SAFE-LABEL: v_rsq_f32_contractable_user_missing_contract0: 2328; GCN-IEEE-SAFE: ; %bb.0: 2329; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2330; GCN-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x800000 2331; GCN-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2332; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, 0, 24, vcc 2333; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2334; GCN-IEEE-SAFE-NEXT: v_rsq_f32_e32 v0, v0 2335; GCN-IEEE-SAFE-NEXT: v_mov_b32_e32 v2, 0x45800000 2336; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc 2337; GCN-IEEE-SAFE-NEXT: v_fma_f32 v0, v0, v2, v1 2338; GCN-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 2339 %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1 2340 %div = fdiv contract float 1.0, %sqrt, !fpmath !1 2341 %add = fadd contract float %div, %val1 2342 ret float %add 2343} 2344 2345; Missing contract on the fadd 2346define float @v_rsq_f32_contractable_user_missing_contract1(float %val0, float %val1) { 2347; GCN-DAZ-LABEL: v_rsq_f32_contractable_user_missing_contract1: 2348; GCN-DAZ: ; %bb.0: 2349; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2350; GCN-DAZ-NEXT: v_rsq_f32_e32 v0, v0 2351; GCN-DAZ-NEXT: v_add_f32_e32 v0, v0, v1 2352; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2353; 2354; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_contractable_user_missing_contract1: 2355; GCN-IEEE-UNSAFE: ; %bb.0: 2356; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2357; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 2358; GCN-IEEE-UNSAFE-NEXT: v_add_f32_e32 v0, v0, v1 2359; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 2360; 2361; GCN-IEEE-SAFE-LABEL: v_rsq_f32_contractable_user_missing_contract1: 2362; GCN-IEEE-SAFE: ; %bb.0: 2363; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2364; GCN-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x800000 2365; GCN-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2366; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, 0, 24, vcc 2367; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2368; GCN-IEEE-SAFE-NEXT: v_rsq_f32_e32 v0, v0 2369; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v2, 0, 12, vcc 2370; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v2 2371; GCN-IEEE-SAFE-NEXT: v_add_f32_e32 v0, v0, v1 2372; GCN-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 2373 %sqrt = call contract float @llvm.sqrt.f32(float %val0), !fpmath !1 2374 %div = fdiv contract float 1.0, %sqrt, !fpmath !1 2375 %add = fadd float %div, %val1 2376 ret float %add 2377} 2378 2379define float @v_rsq_f32_known_never_denormal(float nofpclass(sub) %val) { 2380; GCN-DAZ-LABEL: v_rsq_f32_known_never_denormal: 2381; GCN-DAZ: ; %bb.0: 2382; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2383; GCN-DAZ-NEXT: v_rsq_f32_e32 v0, v0 2384; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2385; 2386; GCN-IEEE-LABEL: v_rsq_f32_known_never_denormal: 2387; GCN-IEEE: ; %bb.0: 2388; GCN-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2389; GCN-IEEE-NEXT: v_rsq_f32_e32 v0, v0 2390; GCN-IEEE-NEXT: s_setpc_b64 s[30:31] 2391 %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1 2392 %div = fdiv contract float 1.0, %sqrt, !fpmath !1 2393 ret float %div 2394} 2395 2396define float @v_rsq_f32_known_never_posdenormal(float nofpclass(psub) %val) { 2397; GCN-DAZ-LABEL: v_rsq_f32_known_never_posdenormal: 2398; GCN-DAZ: ; %bb.0: 2399; GCN-DAZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2400; GCN-DAZ-NEXT: v_rsq_f32_e32 v0, v0 2401; GCN-DAZ-NEXT: s_setpc_b64 s[30:31] 2402; 2403; GCN-IEEE-UNSAFE-LABEL: v_rsq_f32_known_never_posdenormal: 2404; GCN-IEEE-UNSAFE: ; %bb.0: 2405; GCN-IEEE-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2406; GCN-IEEE-UNSAFE-NEXT: v_rsq_f32_e32 v0, v0 2407; GCN-IEEE-UNSAFE-NEXT: s_setpc_b64 s[30:31] 2408; 2409; GCN-IEEE-SAFE-LABEL: v_rsq_f32_known_never_posdenormal: 2410; GCN-IEEE-SAFE: ; %bb.0: 2411; GCN-IEEE-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2412; GCN-IEEE-SAFE-NEXT: s_mov_b32 s4, 0x800000 2413; GCN-IEEE-SAFE-NEXT: v_cmp_gt_f32_e32 vcc, s4, v0 2414; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, 0, 24, vcc 2415; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2416; GCN-IEEE-SAFE-NEXT: v_rsq_f32_e32 v0, v0 2417; GCN-IEEE-SAFE-NEXT: v_cndmask_b32_e64 v1, 0, 12, vcc 2418; GCN-IEEE-SAFE-NEXT: v_ldexp_f32_e32 v0, v0, v1 2419; GCN-IEEE-SAFE-NEXT: s_setpc_b64 s[30:31] 2420 %sqrt = call contract float @llvm.sqrt.f32(float %val), !fpmath !1 2421 %div = fdiv contract float 1.0, %sqrt, !fpmath !1 2422 ret float %div 2423} 2424 2425!0 = !{float 2.500000e+00} 2426!1 = !{float 1.000000e+00} 2427 2428attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 2429;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 2430; CI-DAZ-SAFE: {{.*}} 2431; CI-DAZ-UNSAFE: {{.*}} 2432; CI-IEEE-UNSAFE: {{.*}} 2433; SI-DAZ-SAFE: {{.*}} 2434; SI-DAZ-UNSAFE: {{.*}} 2435; SI-IEEE-UNSAFE: {{.*}} 2436