1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX67,GFX6,GFX6-FASTFMA %s 3; RUN: llc -mtriple=amdgcn -mcpu=pitcairn < %s | FileCheck -check-prefixes=GCN,GFX678,GFX67,GFX6,GFX6-SLOWFMA %s 4; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX67,GFX7 %s 5; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX10 %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX11 %s 8; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s 9 10; These tests check that fdiv is expanded correctly and also test that the 11; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate 12; instruction groups. 13 14; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div. 15 16define amdgpu_kernel void @s_fdiv_f32_ninf(ptr addrspace(1) %out, float %a, float %b) #0 { 17; GFX6-FASTFMA-LABEL: s_fdiv_f32_ninf: 18; GFX6-FASTFMA: ; %bb.0: ; %entry 19; GFX6-FASTFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 20; GFX6-FASTFMA-NEXT: s_mov_b32 s7, 0xf000 21; GFX6-FASTFMA-NEXT: s_mov_b32 s6, -1 22; GFX6-FASTFMA-NEXT: s_waitcnt lgkmcnt(0) 23; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v1, s2 24; GFX6-FASTFMA-NEXT: s_mov_b32 s4, s0 25; GFX6-FASTFMA-NEXT: s_mov_b32 s5, s1 26; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[0:1], s3, s3, v1 27; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 28; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v0, s3 29; GFX6-FASTFMA-NEXT: v_div_scale_f32 v0, vcc, s2, v0, s2 30; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 31; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 32; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 33; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v0, v3 34; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v4, v0 35; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v3, v4 36; GFX6-FASTFMA-NEXT: v_fma_f32 v0, -v2, v4, v0 37; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 38; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v0, v0, v3, v4 39; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v0, s3, v1 40; GFX6-FASTFMA-NEXT: buffer_store_dword v0, off, s[4:7], 0 41; GFX6-FASTFMA-NEXT: s_endpgm 42; 43; GFX6-SLOWFMA-LABEL: s_fdiv_f32_ninf: 44; GFX6-SLOWFMA: ; %bb.0: ; %entry 45; GFX6-SLOWFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 46; GFX6-SLOWFMA-NEXT: s_mov_b32 s7, 0xf000 47; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, -1 48; GFX6-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0) 49; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v0, s2 50; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], s3, s3, v0 51; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v2, s3 52; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s2, v2, s2 53; GFX6-SLOWFMA-NEXT: s_mov_b32 s4, s0 54; GFX6-SLOWFMA-NEXT: s_mov_b32 s5, s1 55; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 56; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 57; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 58; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 59; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 60; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 61; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 62; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 63; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 64; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 65; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, s3, v0 66; GFX6-SLOWFMA-NEXT: buffer_store_dword v0, off, s[4:7], 0 67; GFX6-SLOWFMA-NEXT: s_endpgm 68; 69; GFX7-LABEL: s_fdiv_f32_ninf: 70; GFX7: ; %bb.0: ; %entry 71; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 72; GFX7-NEXT: s_mov_b32 s7, 0xf000 73; GFX7-NEXT: s_mov_b32 s6, -1 74; GFX7-NEXT: s_waitcnt lgkmcnt(0) 75; GFX7-NEXT: v_mov_b32_e32 v1, s2 76; GFX7-NEXT: s_mov_b32 s4, s0 77; GFX7-NEXT: s_mov_b32 s5, s1 78; GFX7-NEXT: v_div_scale_f32 v2, s[0:1], s3, s3, v1 79; GFX7-NEXT: v_rcp_f32_e32 v3, v2 80; GFX7-NEXT: v_mov_b32_e32 v0, s3 81; GFX7-NEXT: v_div_scale_f32 v0, vcc, s2, v0, s2 82; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 83; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 84; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 85; GFX7-NEXT: v_mul_f32_e32 v4, v0, v3 86; GFX7-NEXT: v_fma_f32 v5, -v2, v4, v0 87; GFX7-NEXT: v_fma_f32 v4, v5, v3, v4 88; GFX7-NEXT: v_fma_f32 v0, -v2, v4, v0 89; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 90; GFX7-NEXT: v_div_fmas_f32 v0, v0, v3, v4 91; GFX7-NEXT: v_div_fixup_f32 v0, v0, s3, v1 92; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 93; GFX7-NEXT: s_endpgm 94; 95; GFX8-LABEL: s_fdiv_f32_ninf: 96; GFX8: ; %bb.0: ; %entry 97; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 98; GFX8-NEXT: s_waitcnt lgkmcnt(0) 99; GFX8-NEXT: v_mov_b32_e32 v0, s2 100; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], s3, s3, v0 101; GFX8-NEXT: v_mov_b32_e32 v2, s3 102; GFX8-NEXT: v_div_scale_f32 v2, vcc, s2, v2, s2 103; GFX8-NEXT: v_rcp_f32_e32 v3, v1 104; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 105; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 106; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 107; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 108; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 109; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 110; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 111; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 112; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 113; GFX8-NEXT: v_div_fixup_f32 v2, v1, s3, v0 114; GFX8-NEXT: v_mov_b32_e32 v0, s0 115; GFX8-NEXT: v_mov_b32_e32 v1, s1 116; GFX8-NEXT: flat_store_dword v[0:1], v2 117; GFX8-NEXT: s_endpgm 118; 119; GFX10-LABEL: s_fdiv_f32_ninf: 120; GFX10: ; %bb.0: ; %entry 121; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 122; GFX10-NEXT: s_waitcnt lgkmcnt(0) 123; GFX10-NEXT: v_div_scale_f32 v0, s4, s3, s3, s2 124; GFX10-NEXT: v_div_scale_f32 v2, vcc_lo, s2, s3, s2 125; GFX10-NEXT: v_rcp_f32_e32 v1, v0 126; GFX10-NEXT: s_denorm_mode 15 127; GFX10-NEXT: v_fma_f32 v3, -v0, v1, 1.0 128; GFX10-NEXT: v_fmac_f32_e32 v1, v3, v1 129; GFX10-NEXT: v_mul_f32_e32 v3, v2, v1 130; GFX10-NEXT: v_fma_f32 v4, -v0, v3, v2 131; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v1 132; GFX10-NEXT: v_fma_f32 v0, -v0, v3, v2 133; GFX10-NEXT: s_denorm_mode 12 134; GFX10-NEXT: v_div_fmas_f32 v0, v0, v1, v3 135; GFX10-NEXT: v_mov_b32_e32 v1, 0 136; GFX10-NEXT: v_div_fixup_f32 v0, v0, s3, s2 137; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 138; GFX10-NEXT: s_endpgm 139; 140; GFX11-LABEL: s_fdiv_f32_ninf: 141; GFX11: ; %bb.0: ; %entry 142; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 143; GFX11-NEXT: s_waitcnt lgkmcnt(0) 144; GFX11-NEXT: v_div_scale_f32 v0, null, s3, s3, s2 145; GFX11-NEXT: v_div_scale_f32 v2, vcc_lo, s2, s3, s2 146; GFX11-NEXT: v_rcp_f32_e32 v1, v0 147; GFX11-NEXT: s_denorm_mode 15 148; GFX11-NEXT: s_waitcnt_depctr 0xfff 149; GFX11-NEXT: v_fma_f32 v3, -v0, v1, 1.0 150; GFX11-NEXT: v_fmac_f32_e32 v1, v3, v1 151; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1 152; GFX11-NEXT: v_fma_f32 v4, -v0, v3, v2 153; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v1 154; GFX11-NEXT: v_fma_f32 v0, -v0, v3, v2 155; GFX11-NEXT: s_denorm_mode 12 156; GFX11-NEXT: v_div_fmas_f32 v0, v0, v1, v3 157; GFX11-NEXT: v_mov_b32_e32 v1, 0 158; GFX11-NEXT: v_div_fixup_f32 v0, v0, s3, s2 159; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 160; GFX11-NEXT: s_endpgm 161; 162; EG-LABEL: s_fdiv_f32_ninf: 163; EG: ; %bb.0: ; %entry 164; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 165; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 166; EG-NEXT: CF_END 167; EG-NEXT: PAD 168; EG-NEXT: ALU clause starting at 4: 169; EG-NEXT: RECIP_IEEE * T0.X, KC0[2].W, 170; EG-NEXT: MUL_IEEE T0.X, KC0[2].Z, PS, 171; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 172; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 173entry: 174 %fdiv = fdiv ninf float %a, %b 175 store float %fdiv, ptr addrspace(1) %out 176 ret void 177} 178 179define amdgpu_kernel void @s_fdiv_f32_ieee(ptr addrspace(1) %out, float %a, float %b) #1 { 180; GFX6-FASTFMA-LABEL: s_fdiv_f32_ieee: 181; GFX6-FASTFMA: ; %bb.0: ; %entry 182; GFX6-FASTFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 183; GFX6-FASTFMA-NEXT: s_mov_b32 s7, 0xf000 184; GFX6-FASTFMA-NEXT: s_mov_b32 s6, -1 185; GFX6-FASTFMA-NEXT: s_waitcnt lgkmcnt(0) 186; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v0, s2 187; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s3, s3, v0 188; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 189; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v3, s3 190; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s2, v3, s2 191; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0 192; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2 193; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 194; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 195; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 196; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 197; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 198; GFX6-FASTFMA-NEXT: s_mov_b32 s4, s0 199; GFX6-FASTFMA-NEXT: s_mov_b32 s5, s1 200; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, s3, v0 201; GFX6-FASTFMA-NEXT: buffer_store_dword v0, off, s[4:7], 0 202; GFX6-FASTFMA-NEXT: s_endpgm 203; 204; GFX6-SLOWFMA-LABEL: s_fdiv_f32_ieee: 205; GFX6-SLOWFMA: ; %bb.0: ; %entry 206; GFX6-SLOWFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 207; GFX6-SLOWFMA-NEXT: s_mov_b32 s7, 0xf000 208; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, -1 209; GFX6-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0) 210; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v0, s2 211; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], s3, s3, v0 212; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v2, s3 213; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s2, v2, s2 214; GFX6-SLOWFMA-NEXT: s_mov_b32 s4, s0 215; GFX6-SLOWFMA-NEXT: s_mov_b32 s5, s1 216; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 217; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 218; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 219; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 220; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 221; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 222; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 223; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 224; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, s3, v0 225; GFX6-SLOWFMA-NEXT: buffer_store_dword v0, off, s[4:7], 0 226; GFX6-SLOWFMA-NEXT: s_endpgm 227; 228; GFX7-LABEL: s_fdiv_f32_ieee: 229; GFX7: ; %bb.0: ; %entry 230; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 231; GFX7-NEXT: s_mov_b32 s7, 0xf000 232; GFX7-NEXT: s_mov_b32 s6, -1 233; GFX7-NEXT: s_waitcnt lgkmcnt(0) 234; GFX7-NEXT: v_mov_b32_e32 v0, s2 235; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], s3, s3, v0 236; GFX7-NEXT: v_rcp_f32_e32 v2, v1 237; GFX7-NEXT: v_mov_b32_e32 v3, s3 238; GFX7-NEXT: v_div_scale_f32 v3, vcc, s2, v3, s2 239; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 240; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 241; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 242; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 243; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 244; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 245; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 246; GFX7-NEXT: s_mov_b32 s4, s0 247; GFX7-NEXT: s_mov_b32 s5, s1 248; GFX7-NEXT: v_div_fixup_f32 v0, v1, s3, v0 249; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 250; GFX7-NEXT: s_endpgm 251; 252; GFX8-LABEL: s_fdiv_f32_ieee: 253; GFX8: ; %bb.0: ; %entry 254; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 255; GFX8-NEXT: s_waitcnt lgkmcnt(0) 256; GFX8-NEXT: v_mov_b32_e32 v0, s2 257; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], s3, s3, v0 258; GFX8-NEXT: v_mov_b32_e32 v2, s3 259; GFX8-NEXT: v_div_scale_f32 v2, vcc, s2, v2, s2 260; GFX8-NEXT: v_rcp_f32_e32 v3, v1 261; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 262; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 263; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 264; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 265; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 266; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 267; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 268; GFX8-NEXT: v_div_fixup_f32 v2, v1, s3, v0 269; GFX8-NEXT: v_mov_b32_e32 v0, s0 270; GFX8-NEXT: v_mov_b32_e32 v1, s1 271; GFX8-NEXT: flat_store_dword v[0:1], v2 272; GFX8-NEXT: s_endpgm 273; 274; GFX10-LABEL: s_fdiv_f32_ieee: 275; GFX10: ; %bb.0: ; %entry 276; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 277; GFX10-NEXT: s_waitcnt lgkmcnt(0) 278; GFX10-NEXT: v_div_scale_f32 v0, s4, s3, s3, s2 279; GFX10-NEXT: v_rcp_f32_e32 v1, v0 280; GFX10-NEXT: v_fma_f32 v2, -v0, v1, 1.0 281; GFX10-NEXT: v_fmac_f32_e32 v1, v2, v1 282; GFX10-NEXT: v_div_scale_f32 v2, vcc_lo, s2, s3, s2 283; GFX10-NEXT: v_mul_f32_e32 v3, v2, v1 284; GFX10-NEXT: v_fma_f32 v4, -v0, v3, v2 285; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v1 286; GFX10-NEXT: v_fma_f32 v0, -v0, v3, v2 287; GFX10-NEXT: v_div_fmas_f32 v0, v0, v1, v3 288; GFX10-NEXT: v_mov_b32_e32 v1, 0 289; GFX10-NEXT: v_div_fixup_f32 v0, v0, s3, s2 290; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 291; GFX10-NEXT: s_endpgm 292; 293; GFX11-LABEL: s_fdiv_f32_ieee: 294; GFX11: ; %bb.0: ; %entry 295; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 296; GFX11-NEXT: s_waitcnt lgkmcnt(0) 297; GFX11-NEXT: v_div_scale_f32 v0, null, s3, s3, s2 298; GFX11-NEXT: v_rcp_f32_e32 v1, v0 299; GFX11-NEXT: s_waitcnt_depctr 0xfff 300; GFX11-NEXT: v_fma_f32 v2, -v0, v1, 1.0 301; GFX11-NEXT: v_fmac_f32_e32 v1, v2, v1 302; GFX11-NEXT: v_div_scale_f32 v2, vcc_lo, s2, s3, s2 303; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1 304; GFX11-NEXT: v_fma_f32 v4, -v0, v3, v2 305; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v1 306; GFX11-NEXT: v_fma_f32 v0, -v0, v3, v2 307; GFX11-NEXT: v_div_fmas_f32 v0, v0, v1, v3 308; GFX11-NEXT: v_mov_b32_e32 v1, 0 309; GFX11-NEXT: v_div_fixup_f32 v0, v0, s3, s2 310; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 311; GFX11-NEXT: s_endpgm 312; 313; EG-LABEL: s_fdiv_f32_ieee: 314; EG: ; %bb.0: ; %entry 315; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 316; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 317; EG-NEXT: CF_END 318; EG-NEXT: PAD 319; EG-NEXT: ALU clause starting at 4: 320; EG-NEXT: RECIP_IEEE * T0.X, KC0[2].W, 321; EG-NEXT: MUL_IEEE T0.X, KC0[2].Z, PS, 322; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 323; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 324entry: 325 %fdiv = fdiv float %a, %b 326 store float %fdiv, ptr addrspace(1) %out 327 ret void 328} 329 330define amdgpu_kernel void @s_fdiv_25ulp_f32(ptr addrspace(1) %out, float %a, float %b) #0 { 331; GFX67-LABEL: s_fdiv_25ulp_f32: 332; GFX67: ; %bb.0: ; %entry 333; GFX67-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 334; GFX67-NEXT: v_mov_b32_e32 v0, 0x6f800000 335; GFX67-NEXT: v_mov_b32_e32 v1, 0x2f800000 336; GFX67-NEXT: s_mov_b32 s7, 0xf000 337; GFX67-NEXT: s_mov_b32 s6, -1 338; GFX67-NEXT: s_waitcnt lgkmcnt(0) 339; GFX67-NEXT: v_cmp_gt_f32_e64 vcc, |s3|, v0 340; GFX67-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc 341; GFX67-NEXT: v_mul_f32_e32 v1, s3, v0 342; GFX67-NEXT: v_rcp_f32_e32 v1, v1 343; GFX67-NEXT: s_mov_b32 s4, s0 344; GFX67-NEXT: s_mov_b32 s5, s1 345; GFX67-NEXT: v_mul_f32_e32 v1, s2, v1 346; GFX67-NEXT: v_mul_f32_e32 v0, v0, v1 347; GFX67-NEXT: buffer_store_dword v0, off, s[4:7], 0 348; GFX67-NEXT: s_endpgm 349; 350; GFX8-LABEL: s_fdiv_25ulp_f32: 351; GFX8: ; %bb.0: ; %entry 352; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 353; GFX8-NEXT: v_mov_b32_e32 v0, 0x6f800000 354; GFX8-NEXT: v_mov_b32_e32 v1, 0x2f800000 355; GFX8-NEXT: s_waitcnt lgkmcnt(0) 356; GFX8-NEXT: v_cmp_gt_f32_e64 vcc, |s3|, v0 357; GFX8-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc 358; GFX8-NEXT: v_mul_f32_e32 v1, s3, v0 359; GFX8-NEXT: v_rcp_f32_e32 v1, v1 360; GFX8-NEXT: v_mul_f32_e32 v1, s2, v1 361; GFX8-NEXT: v_mul_f32_e32 v2, v0, v1 362; GFX8-NEXT: v_mov_b32_e32 v0, s0 363; GFX8-NEXT: v_mov_b32_e32 v1, s1 364; GFX8-NEXT: flat_store_dword v[0:1], v2 365; GFX8-NEXT: s_endpgm 366; 367; GFX10-LABEL: s_fdiv_25ulp_f32: 368; GFX10: ; %bb.0: ; %entry 369; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 370; GFX10-NEXT: v_mov_b32_e32 v2, 0 371; GFX10-NEXT: s_waitcnt lgkmcnt(0) 372; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |s3| 373; GFX10-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x2f800000, s4 374; GFX10-NEXT: v_mul_f32_e32 v1, s3, v0 375; GFX10-NEXT: v_rcp_f32_e32 v1, v1 376; GFX10-NEXT: v_mul_f32_e32 v1, s2, v1 377; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 378; GFX10-NEXT: global_store_dword v2, v0, s[0:1] 379; GFX10-NEXT: s_endpgm 380; 381; GFX11-LABEL: s_fdiv_25ulp_f32: 382; GFX11: ; %bb.0: ; %entry 383; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 384; GFX11-NEXT: v_mov_b32_e32 v2, 0 385; GFX11-NEXT: s_waitcnt lgkmcnt(0) 386; GFX11-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |s3| 387; GFX11-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x2f800000, s4 388; GFX11-NEXT: v_mul_f32_e32 v1, s3, v0 389; GFX11-NEXT: v_rcp_f32_e32 v1, v1 390; GFX11-NEXT: s_waitcnt_depctr 0xfff 391; GFX11-NEXT: v_mul_f32_e32 v1, s2, v1 392; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 393; GFX11-NEXT: global_store_b32 v2, v0, s[0:1] 394; GFX11-NEXT: s_endpgm 395; 396; EG-LABEL: s_fdiv_25ulp_f32: 397; EG: ; %bb.0: ; %entry 398; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 399; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 400; EG-NEXT: CF_END 401; EG-NEXT: PAD 402; EG-NEXT: ALU clause starting at 4: 403; EG-NEXT: RECIP_IEEE * T0.X, KC0[2].W, 404; EG-NEXT: MUL_IEEE T0.X, KC0[2].Z, PS, 405; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 406; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 407entry: 408 %fdiv = fdiv float %a, %b, !fpmath !0 409 store float %fdiv, ptr addrspace(1) %out 410 ret void 411} 412 413; Use correct fdiv 414define amdgpu_kernel void @s_fdiv_25ulp_ieee_f32(ptr addrspace(1) %out, float %a, float %b) #1 { 415; GFX6-LABEL: s_fdiv_25ulp_ieee_f32: 416; GFX6: ; %bb.0: ; %entry 417; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 418; GFX6-NEXT: v_mov_b32_e32 v0, 0x7f800000 419; GFX6-NEXT: s_mov_b32 s7, 0xf000 420; GFX6-NEXT: s_mov_b32 s6, -1 421; GFX6-NEXT: s_waitcnt lgkmcnt(0) 422; GFX6-NEXT: v_frexp_mant_f32_e32 v1, s3 423; GFX6-NEXT: v_mov_b32_e32 v2, s3 424; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |s3|, v0 425; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 426; GFX6-NEXT: v_rcp_f32_e32 v1, v1 427; GFX6-NEXT: v_frexp_mant_f32_e32 v3, s2 428; GFX6-NEXT: v_mov_b32_e32 v4, s2 429; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |s2|, v0 430; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v2, s3 431; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc 432; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v3, s2 433; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 434; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v3, v2 435; GFX6-NEXT: s_mov_b32 s4, s0 436; GFX6-NEXT: s_mov_b32 s5, s1 437; GFX6-NEXT: v_ldexp_f32_e32 v0, v0, v1 438; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 439; GFX6-NEXT: s_endpgm 440; 441; GFX7-LABEL: s_fdiv_25ulp_ieee_f32: 442; GFX7: ; %bb.0: ; %entry 443; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 444; GFX7-NEXT: s_mov_b32 s7, 0xf000 445; GFX7-NEXT: s_mov_b32 s6, -1 446; GFX7-NEXT: s_waitcnt lgkmcnt(0) 447; GFX7-NEXT: v_frexp_mant_f32_e32 v0, s3 448; GFX7-NEXT: v_rcp_f32_e32 v0, v0 449; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, s3 450; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v2, s2 451; GFX7-NEXT: v_frexp_mant_f32_e32 v3, s2 452; GFX7-NEXT: v_mul_f32_e32 v0, v3, v0 453; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v2, v1 454; GFX7-NEXT: s_mov_b32 s4, s0 455; GFX7-NEXT: s_mov_b32 s5, s1 456; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 457; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 458; GFX7-NEXT: s_endpgm 459; 460; GFX8-LABEL: s_fdiv_25ulp_ieee_f32: 461; GFX8: ; %bb.0: ; %entry 462; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 463; GFX8-NEXT: s_waitcnt lgkmcnt(0) 464; GFX8-NEXT: v_frexp_mant_f32_e32 v1, s3 465; GFX8-NEXT: v_rcp_f32_e32 v1, v1 466; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v0, s3 467; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v2, s2 468; GFX8-NEXT: v_frexp_mant_f32_e32 v3, s2 469; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v2, v0 470; GFX8-NEXT: v_mul_f32_e32 v1, v3, v1 471; GFX8-NEXT: v_ldexp_f32 v2, v1, v0 472; GFX8-NEXT: v_mov_b32_e32 v0, s0 473; GFX8-NEXT: v_mov_b32_e32 v1, s1 474; GFX8-NEXT: flat_store_dword v[0:1], v2 475; GFX8-NEXT: s_endpgm 476; 477; GFX10-LABEL: s_fdiv_25ulp_ieee_f32: 478; GFX10: ; %bb.0: ; %entry 479; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 480; GFX10-NEXT: s_waitcnt lgkmcnt(0) 481; GFX10-NEXT: v_frexp_mant_f32_e32 v0, s3 482; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, s3 483; GFX10-NEXT: v_frexp_mant_f32_e32 v2, s2 484; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v3, s2 485; GFX10-NEXT: v_rcp_f32_e32 v0, v0 486; GFX10-NEXT: v_sub_nc_u32_e32 v1, v3, v1 487; GFX10-NEXT: v_mul_f32_e32 v0, v2, v0 488; GFX10-NEXT: v_mov_b32_e32 v2, 0 489; GFX10-NEXT: v_ldexp_f32 v0, v0, v1 490; GFX10-NEXT: global_store_dword v2, v0, s[0:1] 491; GFX10-NEXT: s_endpgm 492; 493; GFX11-LABEL: s_fdiv_25ulp_ieee_f32: 494; GFX11: ; %bb.0: ; %entry 495; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 496; GFX11-NEXT: s_waitcnt lgkmcnt(0) 497; GFX11-NEXT: v_frexp_mant_f32_e32 v0, s3 498; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, s3 499; GFX11-NEXT: v_frexp_mant_f32_e32 v2, s2 500; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v3, s2 501; GFX11-NEXT: v_rcp_f32_e32 v0, v0 502; GFX11-NEXT: v_sub_nc_u32_e32 v1, v3, v1 503; GFX11-NEXT: s_waitcnt_depctr 0xfff 504; GFX11-NEXT: v_mul_f32_e32 v0, v2, v0 505; GFX11-NEXT: v_mov_b32_e32 v2, 0 506; GFX11-NEXT: v_ldexp_f32 v0, v0, v1 507; GFX11-NEXT: global_store_b32 v2, v0, s[0:1] 508; GFX11-NEXT: s_endpgm 509; 510; EG-LABEL: s_fdiv_25ulp_ieee_f32: 511; EG: ; %bb.0: ; %entry 512; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 513; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 514; EG-NEXT: CF_END 515; EG-NEXT: PAD 516; EG-NEXT: ALU clause starting at 4: 517; EG-NEXT: RECIP_IEEE * T0.X, KC0[2].W, 518; EG-NEXT: MUL_IEEE T0.X, KC0[2].Z, PS, 519; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 520; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 521entry: 522 %fdiv = fdiv float %a, %b, !fpmath !0 523 store float %fdiv, ptr addrspace(1) %out 524 ret void 525} 526 527define amdgpu_kernel void @s_fdiv_fast_ieee_f32(ptr addrspace(1) %out, float %a, float %b) #1 { 528; GFX67-LABEL: s_fdiv_fast_ieee_f32: 529; GFX67: ; %bb.0: ; %entry 530; GFX67-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 531; GFX67-NEXT: s_mov_b32 s7, 0xf000 532; GFX67-NEXT: s_mov_b32 s6, -1 533; GFX67-NEXT: s_waitcnt lgkmcnt(0) 534; GFX67-NEXT: v_rcp_f32_e32 v0, s3 535; GFX67-NEXT: s_mov_b32 s4, s0 536; GFX67-NEXT: s_mov_b32 s5, s1 537; GFX67-NEXT: v_mul_f32_e32 v0, s2, v0 538; GFX67-NEXT: buffer_store_dword v0, off, s[4:7], 0 539; GFX67-NEXT: s_endpgm 540; 541; GFX8-LABEL: s_fdiv_fast_ieee_f32: 542; GFX8: ; %bb.0: ; %entry 543; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 544; GFX8-NEXT: s_waitcnt lgkmcnt(0) 545; GFX8-NEXT: v_rcp_f32_e32 v0, s3 546; GFX8-NEXT: v_mul_f32_e32 v2, s2, v0 547; GFX8-NEXT: v_mov_b32_e32 v0, s0 548; GFX8-NEXT: v_mov_b32_e32 v1, s1 549; GFX8-NEXT: flat_store_dword v[0:1], v2 550; GFX8-NEXT: s_endpgm 551; 552; GFX10-LABEL: s_fdiv_fast_ieee_f32: 553; GFX10: ; %bb.0: ; %entry 554; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 555; GFX10-NEXT: v_mov_b32_e32 v1, 0 556; GFX10-NEXT: s_waitcnt lgkmcnt(0) 557; GFX10-NEXT: v_rcp_f32_e32 v0, s3 558; GFX10-NEXT: v_mul_f32_e32 v0, s2, v0 559; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 560; GFX10-NEXT: s_endpgm 561; 562; GFX11-LABEL: s_fdiv_fast_ieee_f32: 563; GFX11: ; %bb.0: ; %entry 564; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 565; GFX11-NEXT: s_waitcnt lgkmcnt(0) 566; GFX11-NEXT: v_rcp_f32_e32 v0, s3 567; GFX11-NEXT: s_waitcnt_depctr 0xfff 568; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0 569; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 570; GFX11-NEXT: s_endpgm 571; 572; EG-LABEL: s_fdiv_fast_ieee_f32: 573; EG: ; %bb.0: ; %entry 574; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 575; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 576; EG-NEXT: CF_END 577; EG-NEXT: PAD 578; EG-NEXT: ALU clause starting at 4: 579; EG-NEXT: RECIP_IEEE * T0.X, KC0[2].W, 580; EG-NEXT: MUL_IEEE T0.X, PS, KC0[2].Z, 581; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 582; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 583entry: 584 %fdiv = fdiv fast float %a, %b 585 store float %fdiv, ptr addrspace(1) %out 586 ret void 587} 588 589define amdgpu_kernel void @s_fdiv_f32_fast_math(ptr addrspace(1) %out, float %a, float %b) #0 { 590; GFX67-LABEL: s_fdiv_f32_fast_math: 591; GFX67: ; %bb.0: ; %entry 592; GFX67-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 593; GFX67-NEXT: s_mov_b32 s7, 0xf000 594; GFX67-NEXT: s_mov_b32 s6, -1 595; GFX67-NEXT: s_waitcnt lgkmcnt(0) 596; GFX67-NEXT: v_rcp_f32_e32 v0, s3 597; GFX67-NEXT: s_mov_b32 s4, s0 598; GFX67-NEXT: s_mov_b32 s5, s1 599; GFX67-NEXT: v_mul_f32_e32 v0, s2, v0 600; GFX67-NEXT: buffer_store_dword v0, off, s[4:7], 0 601; GFX67-NEXT: s_endpgm 602; 603; GFX8-LABEL: s_fdiv_f32_fast_math: 604; GFX8: ; %bb.0: ; %entry 605; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 606; GFX8-NEXT: s_waitcnt lgkmcnt(0) 607; GFX8-NEXT: v_rcp_f32_e32 v0, s3 608; GFX8-NEXT: v_mul_f32_e32 v2, s2, v0 609; GFX8-NEXT: v_mov_b32_e32 v0, s0 610; GFX8-NEXT: v_mov_b32_e32 v1, s1 611; GFX8-NEXT: flat_store_dword v[0:1], v2 612; GFX8-NEXT: s_endpgm 613; 614; GFX10-LABEL: s_fdiv_f32_fast_math: 615; GFX10: ; %bb.0: ; %entry 616; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 617; GFX10-NEXT: v_mov_b32_e32 v1, 0 618; GFX10-NEXT: s_waitcnt lgkmcnt(0) 619; GFX10-NEXT: v_rcp_f32_e32 v0, s3 620; GFX10-NEXT: v_mul_f32_e32 v0, s2, v0 621; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 622; GFX10-NEXT: s_endpgm 623; 624; GFX11-LABEL: s_fdiv_f32_fast_math: 625; GFX11: ; %bb.0: ; %entry 626; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 627; GFX11-NEXT: s_waitcnt lgkmcnt(0) 628; GFX11-NEXT: v_rcp_f32_e32 v0, s3 629; GFX11-NEXT: s_waitcnt_depctr 0xfff 630; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0 631; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 632; GFX11-NEXT: s_endpgm 633; 634; EG-LABEL: s_fdiv_f32_fast_math: 635; EG: ; %bb.0: ; %entry 636; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 637; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 638; EG-NEXT: CF_END 639; EG-NEXT: PAD 640; EG-NEXT: ALU clause starting at 4: 641; EG-NEXT: RECIP_IEEE * T0.X, KC0[2].W, 642; EG-NEXT: MUL_IEEE T0.X, PS, KC0[2].Z, 643; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 644; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 645entry: 646 %fdiv = fdiv fast float %a, %b 647 store float %fdiv, ptr addrspace(1) %out 648 ret void 649} 650 651define amdgpu_kernel void @s_fdiv_ulp25_f32_fast_math(ptr addrspace(1) %out, float %a, float %b) #0 { 652; GFX67-LABEL: s_fdiv_ulp25_f32_fast_math: 653; GFX67: ; %bb.0: ; %entry 654; GFX67-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 655; GFX67-NEXT: s_mov_b32 s7, 0xf000 656; GFX67-NEXT: s_mov_b32 s6, -1 657; GFX67-NEXT: s_waitcnt lgkmcnt(0) 658; GFX67-NEXT: v_rcp_f32_e32 v0, s3 659; GFX67-NEXT: s_mov_b32 s4, s0 660; GFX67-NEXT: s_mov_b32 s5, s1 661; GFX67-NEXT: v_mul_f32_e32 v0, s2, v0 662; GFX67-NEXT: buffer_store_dword v0, off, s[4:7], 0 663; GFX67-NEXT: s_endpgm 664; 665; GFX8-LABEL: s_fdiv_ulp25_f32_fast_math: 666; GFX8: ; %bb.0: ; %entry 667; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 668; GFX8-NEXT: s_waitcnt lgkmcnt(0) 669; GFX8-NEXT: v_rcp_f32_e32 v0, s3 670; GFX8-NEXT: v_mul_f32_e32 v2, s2, v0 671; GFX8-NEXT: v_mov_b32_e32 v0, s0 672; GFX8-NEXT: v_mov_b32_e32 v1, s1 673; GFX8-NEXT: flat_store_dword v[0:1], v2 674; GFX8-NEXT: s_endpgm 675; 676; GFX10-LABEL: s_fdiv_ulp25_f32_fast_math: 677; GFX10: ; %bb.0: ; %entry 678; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 679; GFX10-NEXT: v_mov_b32_e32 v1, 0 680; GFX10-NEXT: s_waitcnt lgkmcnt(0) 681; GFX10-NEXT: v_rcp_f32_e32 v0, s3 682; GFX10-NEXT: v_mul_f32_e32 v0, s2, v0 683; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 684; GFX10-NEXT: s_endpgm 685; 686; GFX11-LABEL: s_fdiv_ulp25_f32_fast_math: 687; GFX11: ; %bb.0: ; %entry 688; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 689; GFX11-NEXT: s_waitcnt lgkmcnt(0) 690; GFX11-NEXT: v_rcp_f32_e32 v0, s3 691; GFX11-NEXT: s_waitcnt_depctr 0xfff 692; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0 693; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 694; GFX11-NEXT: s_endpgm 695; 696; EG-LABEL: s_fdiv_ulp25_f32_fast_math: 697; EG: ; %bb.0: ; %entry 698; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 699; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 700; EG-NEXT: CF_END 701; EG-NEXT: PAD 702; EG-NEXT: ALU clause starting at 4: 703; EG-NEXT: RECIP_IEEE * T0.X, KC0[2].W, 704; EG-NEXT: MUL_IEEE T0.X, PS, KC0[2].Z, 705; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 706; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 707entry: 708 %fdiv = fdiv fast float %a, %b, !fpmath !0 709 store float %fdiv, ptr addrspace(1) %out 710 ret void 711} 712 713define amdgpu_kernel void @s_fdiv_f32_arcp_daz(ptr addrspace(1) %out, float %a, float %b) #0 { 714; GFX6-FASTFMA-LABEL: s_fdiv_f32_arcp_daz: 715; GFX6-FASTFMA: ; %bb.0: ; %entry 716; GFX6-FASTFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 717; GFX6-FASTFMA-NEXT: s_mov_b32 s7, 0xf000 718; GFX6-FASTFMA-NEXT: s_mov_b32 s6, -1 719; GFX6-FASTFMA-NEXT: s_waitcnt lgkmcnt(0) 720; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v1, s2 721; GFX6-FASTFMA-NEXT: s_mov_b32 s4, s0 722; GFX6-FASTFMA-NEXT: s_mov_b32 s5, s1 723; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[0:1], s3, s3, v1 724; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 725; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v0, s3 726; GFX6-FASTFMA-NEXT: v_div_scale_f32 v0, vcc, s2, v0, s2 727; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 728; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 729; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 730; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v0, v3 731; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v4, v0 732; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v3, v4 733; GFX6-FASTFMA-NEXT: v_fma_f32 v0, -v2, v4, v0 734; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 735; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v0, v0, v3, v4 736; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v0, s3, v1 737; GFX6-FASTFMA-NEXT: buffer_store_dword v0, off, s[4:7], 0 738; GFX6-FASTFMA-NEXT: s_endpgm 739; 740; GFX6-SLOWFMA-LABEL: s_fdiv_f32_arcp_daz: 741; GFX6-SLOWFMA: ; %bb.0: ; %entry 742; GFX6-SLOWFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 743; GFX6-SLOWFMA-NEXT: s_mov_b32 s7, 0xf000 744; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, -1 745; GFX6-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0) 746; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v0, s2 747; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], s3, s3, v0 748; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v2, s3 749; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s2, v2, s2 750; GFX6-SLOWFMA-NEXT: s_mov_b32 s4, s0 751; GFX6-SLOWFMA-NEXT: s_mov_b32 s5, s1 752; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 753; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 754; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 755; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 756; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 757; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 758; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 759; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 760; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 761; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 762; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, s3, v0 763; GFX6-SLOWFMA-NEXT: buffer_store_dword v0, off, s[4:7], 0 764; GFX6-SLOWFMA-NEXT: s_endpgm 765; 766; GFX7-LABEL: s_fdiv_f32_arcp_daz: 767; GFX7: ; %bb.0: ; %entry 768; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 769; GFX7-NEXT: s_mov_b32 s7, 0xf000 770; GFX7-NEXT: s_mov_b32 s6, -1 771; GFX7-NEXT: s_waitcnt lgkmcnt(0) 772; GFX7-NEXT: v_mov_b32_e32 v1, s2 773; GFX7-NEXT: s_mov_b32 s4, s0 774; GFX7-NEXT: s_mov_b32 s5, s1 775; GFX7-NEXT: v_div_scale_f32 v2, s[0:1], s3, s3, v1 776; GFX7-NEXT: v_rcp_f32_e32 v3, v2 777; GFX7-NEXT: v_mov_b32_e32 v0, s3 778; GFX7-NEXT: v_div_scale_f32 v0, vcc, s2, v0, s2 779; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 780; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 781; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 782; GFX7-NEXT: v_mul_f32_e32 v4, v0, v3 783; GFX7-NEXT: v_fma_f32 v5, -v2, v4, v0 784; GFX7-NEXT: v_fma_f32 v4, v5, v3, v4 785; GFX7-NEXT: v_fma_f32 v0, -v2, v4, v0 786; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 787; GFX7-NEXT: v_div_fmas_f32 v0, v0, v3, v4 788; GFX7-NEXT: v_div_fixup_f32 v0, v0, s3, v1 789; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 790; GFX7-NEXT: s_endpgm 791; 792; GFX8-LABEL: s_fdiv_f32_arcp_daz: 793; GFX8: ; %bb.0: ; %entry 794; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 795; GFX8-NEXT: s_waitcnt lgkmcnt(0) 796; GFX8-NEXT: v_mov_b32_e32 v0, s2 797; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], s3, s3, v0 798; GFX8-NEXT: v_mov_b32_e32 v2, s3 799; GFX8-NEXT: v_div_scale_f32 v2, vcc, s2, v2, s2 800; GFX8-NEXT: v_rcp_f32_e32 v3, v1 801; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 802; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 803; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 804; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 805; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 806; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 807; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 808; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 809; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 810; GFX8-NEXT: v_div_fixup_f32 v2, v1, s3, v0 811; GFX8-NEXT: v_mov_b32_e32 v0, s0 812; GFX8-NEXT: v_mov_b32_e32 v1, s1 813; GFX8-NEXT: flat_store_dword v[0:1], v2 814; GFX8-NEXT: s_endpgm 815; 816; GFX10-LABEL: s_fdiv_f32_arcp_daz: 817; GFX10: ; %bb.0: ; %entry 818; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 819; GFX10-NEXT: s_waitcnt lgkmcnt(0) 820; GFX10-NEXT: v_div_scale_f32 v0, s4, s3, s3, s2 821; GFX10-NEXT: v_div_scale_f32 v2, vcc_lo, s2, s3, s2 822; GFX10-NEXT: v_rcp_f32_e32 v1, v0 823; GFX10-NEXT: s_denorm_mode 15 824; GFX10-NEXT: v_fma_f32 v3, -v0, v1, 1.0 825; GFX10-NEXT: v_fmac_f32_e32 v1, v3, v1 826; GFX10-NEXT: v_mul_f32_e32 v3, v2, v1 827; GFX10-NEXT: v_fma_f32 v4, -v0, v3, v2 828; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v1 829; GFX10-NEXT: v_fma_f32 v0, -v0, v3, v2 830; GFX10-NEXT: s_denorm_mode 12 831; GFX10-NEXT: v_div_fmas_f32 v0, v0, v1, v3 832; GFX10-NEXT: v_mov_b32_e32 v1, 0 833; GFX10-NEXT: v_div_fixup_f32 v0, v0, s3, s2 834; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 835; GFX10-NEXT: s_endpgm 836; 837; GFX11-LABEL: s_fdiv_f32_arcp_daz: 838; GFX11: ; %bb.0: ; %entry 839; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 840; GFX11-NEXT: s_waitcnt lgkmcnt(0) 841; GFX11-NEXT: v_div_scale_f32 v0, null, s3, s3, s2 842; GFX11-NEXT: v_div_scale_f32 v2, vcc_lo, s2, s3, s2 843; GFX11-NEXT: v_rcp_f32_e32 v1, v0 844; GFX11-NEXT: s_denorm_mode 15 845; GFX11-NEXT: s_waitcnt_depctr 0xfff 846; GFX11-NEXT: v_fma_f32 v3, -v0, v1, 1.0 847; GFX11-NEXT: v_fmac_f32_e32 v1, v3, v1 848; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1 849; GFX11-NEXT: v_fma_f32 v4, -v0, v3, v2 850; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v1 851; GFX11-NEXT: v_fma_f32 v0, -v0, v3, v2 852; GFX11-NEXT: s_denorm_mode 12 853; GFX11-NEXT: v_div_fmas_f32 v0, v0, v1, v3 854; GFX11-NEXT: v_mov_b32_e32 v1, 0 855; GFX11-NEXT: v_div_fixup_f32 v0, v0, s3, s2 856; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 857; GFX11-NEXT: s_endpgm 858; 859; EG-LABEL: s_fdiv_f32_arcp_daz: 860; EG: ; %bb.0: ; %entry 861; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 862; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 863; EG-NEXT: CF_END 864; EG-NEXT: PAD 865; EG-NEXT: ALU clause starting at 4: 866; EG-NEXT: RECIP_IEEE * T0.X, KC0[2].W, 867; EG-NEXT: MUL_IEEE T0.X, KC0[2].Z, PS, 868; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 869; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 870entry: 871 %fdiv = fdiv arcp float %a, %b 872 store float %fdiv, ptr addrspace(1) %out 873 ret void 874} 875 876define amdgpu_kernel void @s_fdiv_f32_arcp_ninf(ptr addrspace(1) %out, float %a, float %b) #0 { 877; GFX67-LABEL: s_fdiv_f32_arcp_ninf: 878; GFX67: ; %bb.0: ; %entry 879; GFX67-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 880; GFX67-NEXT: s_mov_b32 s7, 0xf000 881; GFX67-NEXT: s_mov_b32 s6, -1 882; GFX67-NEXT: s_waitcnt lgkmcnt(0) 883; GFX67-NEXT: v_rcp_f32_e32 v0, s3 884; GFX67-NEXT: s_mov_b32 s4, s0 885; GFX67-NEXT: s_mov_b32 s5, s1 886; GFX67-NEXT: v_mul_f32_e32 v0, s2, v0 887; GFX67-NEXT: buffer_store_dword v0, off, s[4:7], 0 888; GFX67-NEXT: s_endpgm 889; 890; GFX8-LABEL: s_fdiv_f32_arcp_ninf: 891; GFX8: ; %bb.0: ; %entry 892; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 893; GFX8-NEXT: s_waitcnt lgkmcnt(0) 894; GFX8-NEXT: v_rcp_f32_e32 v0, s3 895; GFX8-NEXT: v_mul_f32_e32 v2, s2, v0 896; GFX8-NEXT: v_mov_b32_e32 v0, s0 897; GFX8-NEXT: v_mov_b32_e32 v1, s1 898; GFX8-NEXT: flat_store_dword v[0:1], v2 899; GFX8-NEXT: s_endpgm 900; 901; GFX10-LABEL: s_fdiv_f32_arcp_ninf: 902; GFX10: ; %bb.0: ; %entry 903; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 904; GFX10-NEXT: v_mov_b32_e32 v1, 0 905; GFX10-NEXT: s_waitcnt lgkmcnt(0) 906; GFX10-NEXT: v_rcp_f32_e32 v0, s3 907; GFX10-NEXT: v_mul_f32_e32 v0, s2, v0 908; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 909; GFX10-NEXT: s_endpgm 910; 911; GFX11-LABEL: s_fdiv_f32_arcp_ninf: 912; GFX11: ; %bb.0: ; %entry 913; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 914; GFX11-NEXT: s_waitcnt lgkmcnt(0) 915; GFX11-NEXT: v_rcp_f32_e32 v0, s3 916; GFX11-NEXT: s_waitcnt_depctr 0xfff 917; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mul_f32 v0, s2, v0 918; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 919; GFX11-NEXT: s_endpgm 920; 921; EG-LABEL: s_fdiv_f32_arcp_ninf: 922; EG: ; %bb.0: ; %entry 923; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 924; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 925; EG-NEXT: CF_END 926; EG-NEXT: PAD 927; EG-NEXT: ALU clause starting at 4: 928; EG-NEXT: RECIP_IEEE * T0.X, KC0[2].W, 929; EG-NEXT: MUL_IEEE T0.X, PS, KC0[2].Z, 930; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 931; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 932entry: 933 %fdiv = fdiv arcp ninf float %a, %b 934 store float %fdiv, ptr addrspace(1) %out 935 ret void 936} 937 938define amdgpu_kernel void @s_fdiv_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 { 939; GFX6-FASTFMA-LABEL: s_fdiv_v2f32: 940; GFX6-FASTFMA: ; %bb.0: ; %entry 941; GFX6-FASTFMA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xb 942; GFX6-FASTFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 943; GFX6-FASTFMA-NEXT: s_mov_b32 s3, 0xf000 944; GFX6-FASTFMA-NEXT: s_mov_b32 s2, -1 945; GFX6-FASTFMA-NEXT: s_waitcnt lgkmcnt(0) 946; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v1, s9 947; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], s11, s11, v1 948; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 949; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v0, s11 950; GFX6-FASTFMA-NEXT: v_div_scale_f32 v0, vcc, s9, v0, s9 951; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 952; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 953; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 954; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v0, v3 955; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v4, v0 956; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v3, v4 957; GFX6-FASTFMA-NEXT: v_fma_f32 v0, -v2, v4, v0 958; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 959; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v2, s8 960; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v0, v0, v3, v4 961; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], s10, s10, v2 962; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 963; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v1, v0, s11, v1 964; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v0, s10 965; GFX6-FASTFMA-NEXT: v_div_scale_f32 v0, vcc, s8, v0, s8 966; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 967; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 968; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v4, v4 969; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v0, v4 970; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v5, v0 971; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v4, v5 972; GFX6-FASTFMA-NEXT: v_fma_f32 v0, -v3, v5, v0 973; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 974; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v0, v0, v4, v5 975; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v0, s10, v2 976; GFX6-FASTFMA-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 977; GFX6-FASTFMA-NEXT: s_endpgm 978; 979; GFX6-SLOWFMA-LABEL: s_fdiv_v2f32: 980; GFX6-SLOWFMA: ; %bb.0: ; %entry 981; GFX6-SLOWFMA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb 982; GFX6-SLOWFMA-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 983; GFX6-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0) 984; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v0, s1 985; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[6:7], s3, s3, v0 986; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v2, s3 987; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s1, v2, s1 988; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v4, s0 989; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 990; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 991; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v3, 1.0 992; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v5, v3, v3 993; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v2, v3 994; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v1, v5, v2 995; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v3, v5 996; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v5, v2 997; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 998; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[6:7], s2, s2, v4 999; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v5 1000; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v3, s2 1001; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, s0, v3, s0 1002; GFX6-SLOWFMA-NEXT: s_mov_b32 s7, 0xf000 1003; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, -1 1004; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v5, v2 1005; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v1, v1, s3, v0 1006; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1007; GFX6-SLOWFMA-NEXT: v_fma_f32 v0, -v2, v5, 1.0 1008; GFX6-SLOWFMA-NEXT: v_fma_f32 v0, v0, v5, v5 1009; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v0 1010; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 1011; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v0, v5 1012; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 1013; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1014; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v0, v2, v0, v5 1015; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v0, s2, v4 1016; GFX6-SLOWFMA-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1017; GFX6-SLOWFMA-NEXT: s_endpgm 1018; 1019; GFX7-LABEL: s_fdiv_v2f32: 1020; GFX7: ; %bb.0: ; %entry 1021; GFX7-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0xb 1022; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1023; GFX7-NEXT: s_mov_b32 s3, 0xf000 1024; GFX7-NEXT: s_mov_b32 s2, -1 1025; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1026; GFX7-NEXT: v_mov_b32_e32 v1, s9 1027; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], s11, s11, v1 1028; GFX7-NEXT: v_rcp_f32_e32 v3, v2 1029; GFX7-NEXT: v_mov_b32_e32 v0, s11 1030; GFX7-NEXT: v_div_scale_f32 v0, vcc, s9, v0, s9 1031; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1032; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 1033; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 1034; GFX7-NEXT: v_mul_f32_e32 v4, v0, v3 1035; GFX7-NEXT: v_fma_f32 v5, -v2, v4, v0 1036; GFX7-NEXT: v_fma_f32 v4, v5, v3, v4 1037; GFX7-NEXT: v_fma_f32 v0, -v2, v4, v0 1038; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1039; GFX7-NEXT: v_mov_b32_e32 v2, s8 1040; GFX7-NEXT: v_div_fmas_f32 v0, v0, v3, v4 1041; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], s10, s10, v2 1042; GFX7-NEXT: v_rcp_f32_e32 v4, v3 1043; GFX7-NEXT: v_div_fixup_f32 v1, v0, s11, v1 1044; GFX7-NEXT: v_mov_b32_e32 v0, s10 1045; GFX7-NEXT: v_div_scale_f32 v0, vcc, s8, v0, s8 1046; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1047; GFX7-NEXT: v_fma_f32 v5, -v3, v4, 1.0 1048; GFX7-NEXT: v_fma_f32 v4, v5, v4, v4 1049; GFX7-NEXT: v_mul_f32_e32 v5, v0, v4 1050; GFX7-NEXT: v_fma_f32 v6, -v3, v5, v0 1051; GFX7-NEXT: v_fma_f32 v5, v6, v4, v5 1052; GFX7-NEXT: v_fma_f32 v0, -v3, v5, v0 1053; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1054; GFX7-NEXT: v_div_fmas_f32 v0, v0, v4, v5 1055; GFX7-NEXT: v_div_fixup_f32 v0, v0, s10, v2 1056; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1057; GFX7-NEXT: s_endpgm 1058; 1059; GFX8-LABEL: s_fdiv_v2f32: 1060; GFX8: ; %bb.0: ; %entry 1061; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 1062; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1063; GFX8-NEXT: v_mov_b32_e32 v0, s1 1064; GFX8-NEXT: v_div_scale_f32 v1, s[6:7], s3, s3, v0 1065; GFX8-NEXT: v_mov_b32_e32 v2, s3 1066; GFX8-NEXT: v_div_scale_f32 v2, vcc, s1, v2, s1 1067; GFX8-NEXT: v_mov_b32_e32 v4, s0 1068; GFX8-NEXT: v_rcp_f32_e32 v3, v1 1069; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1070; GFX8-NEXT: v_fma_f32 v5, -v1, v3, 1.0 1071; GFX8-NEXT: v_fma_f32 v3, v5, v3, v3 1072; GFX8-NEXT: v_mul_f32_e32 v5, v2, v3 1073; GFX8-NEXT: v_fma_f32 v6, -v1, v5, v2 1074; GFX8-NEXT: v_fma_f32 v5, v6, v3, v5 1075; GFX8-NEXT: v_fma_f32 v1, -v1, v5, v2 1076; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1077; GFX8-NEXT: v_div_scale_f32 v2, s[6:7], s2, s2, v4 1078; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v5 1079; GFX8-NEXT: v_mov_b32_e32 v3, s2 1080; GFX8-NEXT: v_div_scale_f32 v3, vcc, s0, v3, s0 1081; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1082; GFX8-NEXT: v_rcp_f32_e32 v5, v2 1083; GFX8-NEXT: v_div_fixup_f32 v1, v1, s3, v0 1084; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1085; GFX8-NEXT: v_fma_f32 v0, -v2, v5, 1.0 1086; GFX8-NEXT: v_fma_f32 v0, v0, v5, v5 1087; GFX8-NEXT: v_mul_f32_e32 v5, v3, v0 1088; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 1089; GFX8-NEXT: v_fma_f32 v5, v6, v0, v5 1090; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 1091; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1092; GFX8-NEXT: v_div_fmas_f32 v0, v2, v0, v5 1093; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1094; GFX8-NEXT: v_mov_b32_e32 v3, s1 1095; GFX8-NEXT: v_mov_b32_e32 v2, s0 1096; GFX8-NEXT: v_div_fixup_f32 v0, v0, s2, v4 1097; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1098; GFX8-NEXT: s_endpgm 1099; 1100; GFX10-LABEL: s_fdiv_v2f32: 1101; GFX10: ; %bb.0: ; %entry 1102; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 1103; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1104; GFX10-NEXT: v_div_scale_f32 v0, s6, s3, s3, s1 1105; GFX10-NEXT: v_div_scale_f32 v2, vcc_lo, s1, s3, s1 1106; GFX10-NEXT: v_rcp_f32_e32 v1, v0 1107; GFX10-NEXT: s_denorm_mode 15 1108; GFX10-NEXT: v_fma_f32 v3, -v0, v1, 1.0 1109; GFX10-NEXT: v_fmac_f32_e32 v1, v3, v1 1110; GFX10-NEXT: v_mul_f32_e32 v3, v2, v1 1111; GFX10-NEXT: v_fma_f32 v4, -v0, v3, v2 1112; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v1 1113; GFX10-NEXT: v_fma_f32 v0, -v0, v3, v2 1114; GFX10-NEXT: s_denorm_mode 12 1115; GFX10-NEXT: v_div_scale_f32 v2, s6, s2, s2, s0 1116; GFX10-NEXT: v_div_fmas_f32 v0, v0, v1, v3 1117; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 1118; GFX10-NEXT: v_rcp_f32_e32 v3, v2 1119; GFX10-NEXT: v_div_fixup_f32 v1, v0, s3, s1 1120; GFX10-NEXT: v_div_scale_f32 v0, vcc_lo, s0, s2, s0 1121; GFX10-NEXT: s_denorm_mode 15 1122; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 1123; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 1124; GFX10-NEXT: v_mul_f32_e32 v4, v0, v3 1125; GFX10-NEXT: v_fma_f32 v5, -v2, v4, v0 1126; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v3 1127; GFX10-NEXT: v_fma_f32 v0, -v2, v4, v0 1128; GFX10-NEXT: s_denorm_mode 12 1129; GFX10-NEXT: v_mov_b32_e32 v2, 0 1130; GFX10-NEXT: v_div_fmas_f32 v0, v0, v3, v4 1131; GFX10-NEXT: v_div_fixup_f32 v0, v0, s2, s0 1132; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1133; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] 1134; GFX10-NEXT: s_endpgm 1135; 1136; GFX11-LABEL: s_fdiv_v2f32: 1137; GFX11: ; %bb.0: ; %entry 1138; GFX11-NEXT: s_clause 0x1 1139; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c 1140; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 1141; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1142; GFX11-NEXT: v_div_scale_f32 v0, null, s3, s3, s1 1143; GFX11-NEXT: v_div_scale_f32 v2, vcc_lo, s1, s3, s1 1144; GFX11-NEXT: v_rcp_f32_e32 v1, v0 1145; GFX11-NEXT: s_denorm_mode 15 1146; GFX11-NEXT: s_waitcnt_depctr 0xfff 1147; GFX11-NEXT: v_fma_f32 v3, -v0, v1, 1.0 1148; GFX11-NEXT: v_fmac_f32_e32 v1, v3, v1 1149; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1 1150; GFX11-NEXT: v_fma_f32 v4, -v0, v3, v2 1151; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v1 1152; GFX11-NEXT: v_fma_f32 v0, -v0, v3, v2 1153; GFX11-NEXT: s_denorm_mode 12 1154; GFX11-NEXT: v_div_scale_f32 v2, null, s2, s2, s0 1155; GFX11-NEXT: v_div_fmas_f32 v0, v0, v1, v3 1156; GFX11-NEXT: v_rcp_f32_e32 v3, v2 1157; GFX11-NEXT: v_div_fixup_f32 v1, v0, s3, s1 1158; GFX11-NEXT: v_div_scale_f32 v0, vcc_lo, s0, s2, s0 1159; GFX11-NEXT: s_denorm_mode 15 1160; GFX11-NEXT: s_waitcnt_depctr 0xfff 1161; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 1162; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 1163; GFX11-NEXT: v_mul_f32_e32 v4, v0, v3 1164; GFX11-NEXT: v_fma_f32 v5, -v2, v4, v0 1165; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v3 1166; GFX11-NEXT: v_fma_f32 v0, -v2, v4, v0 1167; GFX11-NEXT: v_mov_b32_e32 v2, 0 1168; GFX11-NEXT: s_denorm_mode 12 1169; GFX11-NEXT: v_div_fmas_f32 v0, v0, v3, v4 1170; GFX11-NEXT: v_div_fixup_f32 v0, v0, s2, s0 1171; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5] 1172; GFX11-NEXT: s_endpgm 1173; 1174; EG-LABEL: s_fdiv_v2f32: 1175; EG: ; %bb.0: ; %entry 1176; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 1177; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1178; EG-NEXT: CF_END 1179; EG-NEXT: PAD 1180; EG-NEXT: ALU clause starting at 4: 1181; EG-NEXT: RECIP_IEEE * T0.X, KC0[3].Z, 1182; EG-NEXT: MUL_IEEE T0.Y, KC0[3].X, PS, 1183; EG-NEXT: RECIP_IEEE * T0.X, KC0[3].Y, 1184; EG-NEXT: MUL_IEEE T0.X, KC0[2].W, PS, 1185; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1186; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1187entry: 1188 %fdiv = fdiv <2 x float> %a, %b 1189 store <2 x float> %fdiv, ptr addrspace(1) %out 1190 ret void 1191} 1192 1193define amdgpu_kernel void @s_fdiv_ulp25_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 { 1194; GFX67-LABEL: s_fdiv_ulp25_v2f32: 1195; GFX67: ; %bb.0: ; %entry 1196; GFX67-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb 1197; GFX67-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 1198; GFX67-NEXT: s_mov_b32 s7, 0xf000 1199; GFX67-NEXT: s_mov_b32 s6, -1 1200; GFX67-NEXT: s_waitcnt lgkmcnt(0) 1201; GFX67-NEXT: v_rcp_f32_e32 v0, s2 1202; GFX67-NEXT: v_rcp_f32_e32 v1, s3 1203; GFX67-NEXT: v_mul_f32_e32 v0, s0, v0 1204; GFX67-NEXT: v_mul_f32_e32 v1, s1, v1 1205; GFX67-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1206; GFX67-NEXT: s_endpgm 1207; 1208; GFX8-LABEL: s_fdiv_ulp25_v2f32: 1209; GFX8: ; %bb.0: ; %entry 1210; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 1211; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 1212; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1213; GFX8-NEXT: v_rcp_f32_e32 v0, s2 1214; GFX8-NEXT: v_rcp_f32_e32 v1, s3 1215; GFX8-NEXT: v_mov_b32_e32 v2, s4 1216; GFX8-NEXT: v_mov_b32_e32 v3, s5 1217; GFX8-NEXT: v_mul_f32_e32 v0, s0, v0 1218; GFX8-NEXT: v_mul_f32_e32 v1, s1, v1 1219; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1220; GFX8-NEXT: s_endpgm 1221; 1222; GFX10-LABEL: s_fdiv_ulp25_v2f32: 1223; GFX10: ; %bb.0: ; %entry 1224; GFX10-NEXT: s_clause 0x1 1225; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 1226; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 1227; GFX10-NEXT: v_mov_b32_e32 v2, 0 1228; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1229; GFX10-NEXT: v_rcp_f32_e32 v0, s2 1230; GFX10-NEXT: v_rcp_f32_e32 v1, s3 1231; GFX10-NEXT: v_mul_f32_e32 v0, s0, v0 1232; GFX10-NEXT: v_mul_f32_e32 v1, s1, v1 1233; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] 1234; GFX10-NEXT: s_endpgm 1235; 1236; GFX11-LABEL: s_fdiv_ulp25_v2f32: 1237; GFX11: ; %bb.0: ; %entry 1238; GFX11-NEXT: s_clause 0x1 1239; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c 1240; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 1241; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1242; GFX11-NEXT: v_rcp_f32_e32 v0, s2 1243; GFX11-NEXT: v_rcp_f32_e32 v1, s3 1244; GFX11-NEXT: v_mov_b32_e32 v2, 0 1245; GFX11-NEXT: s_waitcnt_depctr 0xfff 1246; GFX11-NEXT: v_dual_mul_f32 v0, s0, v0 :: v_dual_mul_f32 v1, s1, v1 1247; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5] 1248; GFX11-NEXT: s_endpgm 1249; 1250; EG-LABEL: s_fdiv_ulp25_v2f32: 1251; EG: ; %bb.0: ; %entry 1252; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 1253; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1254; EG-NEXT: CF_END 1255; EG-NEXT: PAD 1256; EG-NEXT: ALU clause starting at 4: 1257; EG-NEXT: RECIP_IEEE * T0.X, KC0[3].Z, 1258; EG-NEXT: MUL_IEEE T0.Y, KC0[3].X, PS, 1259; EG-NEXT: RECIP_IEEE * T0.X, KC0[3].Y, 1260; EG-NEXT: MUL_IEEE T0.X, KC0[2].W, PS, 1261; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1262; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1263entry: 1264 %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0 1265 store <2 x float> %fdiv, ptr addrspace(1) %out 1266 ret void 1267} 1268 1269define amdgpu_kernel void @s_fdiv_v2f32_fast_math(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 { 1270; GFX67-LABEL: s_fdiv_v2f32_fast_math: 1271; GFX67: ; %bb.0: ; %entry 1272; GFX67-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb 1273; GFX67-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 1274; GFX67-NEXT: s_mov_b32 s7, 0xf000 1275; GFX67-NEXT: s_mov_b32 s6, -1 1276; GFX67-NEXT: s_waitcnt lgkmcnt(0) 1277; GFX67-NEXT: v_rcp_f32_e32 v0, s3 1278; GFX67-NEXT: v_rcp_f32_e32 v2, s2 1279; GFX67-NEXT: v_mul_f32_e32 v1, s1, v0 1280; GFX67-NEXT: v_mul_f32_e32 v0, s0, v2 1281; GFX67-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1282; GFX67-NEXT: s_endpgm 1283; 1284; GFX8-LABEL: s_fdiv_v2f32_fast_math: 1285; GFX8: ; %bb.0: ; %entry 1286; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 1287; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 1288; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1289; GFX8-NEXT: v_rcp_f32_e32 v0, s3 1290; GFX8-NEXT: v_rcp_f32_e32 v2, s2 1291; GFX8-NEXT: v_mul_f32_e32 v1, s1, v0 1292; GFX8-NEXT: v_mul_f32_e32 v0, s0, v2 1293; GFX8-NEXT: v_mov_b32_e32 v2, s4 1294; GFX8-NEXT: v_mov_b32_e32 v3, s5 1295; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1296; GFX8-NEXT: s_endpgm 1297; 1298; GFX10-LABEL: s_fdiv_v2f32_fast_math: 1299; GFX10: ; %bb.0: ; %entry 1300; GFX10-NEXT: s_clause 0x1 1301; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 1302; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 1303; GFX10-NEXT: v_mov_b32_e32 v3, 0 1304; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1305; GFX10-NEXT: v_rcp_f32_e32 v0, s3 1306; GFX10-NEXT: v_rcp_f32_e32 v2, s2 1307; GFX10-NEXT: v_mul_f32_e32 v1, s1, v0 1308; GFX10-NEXT: v_mul_f32_e32 v0, s0, v2 1309; GFX10-NEXT: global_store_dwordx2 v3, v[0:1], s[6:7] 1310; GFX10-NEXT: s_endpgm 1311; 1312; GFX11-LABEL: s_fdiv_v2f32_fast_math: 1313; GFX11: ; %bb.0: ; %entry 1314; GFX11-NEXT: s_clause 0x1 1315; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c 1316; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 1317; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1318; GFX11-NEXT: v_rcp_f32_e32 v0, s3 1319; GFX11-NEXT: v_rcp_f32_e32 v2, s2 1320; GFX11-NEXT: v_mov_b32_e32 v3, 0 1321; GFX11-NEXT: s_waitcnt_depctr 0xfff 1322; GFX11-NEXT: v_dual_mul_f32 v1, s1, v0 :: v_dual_mul_f32 v0, s0, v2 1323; GFX11-NEXT: global_store_b64 v3, v[0:1], s[4:5] 1324; GFX11-NEXT: s_endpgm 1325; 1326; EG-LABEL: s_fdiv_v2f32_fast_math: 1327; EG: ; %bb.0: ; %entry 1328; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 1329; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1330; EG-NEXT: CF_END 1331; EG-NEXT: PAD 1332; EG-NEXT: ALU clause starting at 4: 1333; EG-NEXT: RECIP_IEEE * T0.X, KC0[3].Z, 1334; EG-NEXT: MUL_IEEE T0.Y, PS, KC0[3].X, 1335; EG-NEXT: RECIP_IEEE * T0.X, KC0[3].Y, 1336; EG-NEXT: MUL_IEEE T0.X, PS, KC0[2].W, 1337; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1338; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1339entry: 1340 %fdiv = fdiv fast <2 x float> %a, %b 1341 store <2 x float> %fdiv, ptr addrspace(1) %out 1342 ret void 1343} 1344 1345define amdgpu_kernel void @s_fdiv_v2f32_arcp_math(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 { 1346; GFX67-LABEL: s_fdiv_v2f32_arcp_math: 1347; GFX67: ; %bb.0: ; %entry 1348; GFX67-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb 1349; GFX67-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 1350; GFX67-NEXT: s_mov_b32 s7, 0xf000 1351; GFX67-NEXT: s_mov_b32 s6, -1 1352; GFX67-NEXT: s_waitcnt lgkmcnt(0) 1353; GFX67-NEXT: v_rcp_f32_e32 v0, s3 1354; GFX67-NEXT: v_rcp_f32_e32 v2, s2 1355; GFX67-NEXT: v_mul_f32_e32 v1, s1, v0 1356; GFX67-NEXT: v_mul_f32_e32 v0, s0, v2 1357; GFX67-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1358; GFX67-NEXT: s_endpgm 1359; 1360; GFX8-LABEL: s_fdiv_v2f32_arcp_math: 1361; GFX8: ; %bb.0: ; %entry 1362; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 1363; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 1364; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1365; GFX8-NEXT: v_rcp_f32_e32 v0, s3 1366; GFX8-NEXT: v_rcp_f32_e32 v2, s2 1367; GFX8-NEXT: v_mul_f32_e32 v1, s1, v0 1368; GFX8-NEXT: v_mul_f32_e32 v0, s0, v2 1369; GFX8-NEXT: v_mov_b32_e32 v2, s4 1370; GFX8-NEXT: v_mov_b32_e32 v3, s5 1371; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1372; GFX8-NEXT: s_endpgm 1373; 1374; GFX10-LABEL: s_fdiv_v2f32_arcp_math: 1375; GFX10: ; %bb.0: ; %entry 1376; GFX10-NEXT: s_clause 0x1 1377; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c 1378; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 1379; GFX10-NEXT: v_mov_b32_e32 v3, 0 1380; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1381; GFX10-NEXT: v_rcp_f32_e32 v0, s3 1382; GFX10-NEXT: v_rcp_f32_e32 v2, s2 1383; GFX10-NEXT: v_mul_f32_e32 v1, s1, v0 1384; GFX10-NEXT: v_mul_f32_e32 v0, s0, v2 1385; GFX10-NEXT: global_store_dwordx2 v3, v[0:1], s[6:7] 1386; GFX10-NEXT: s_endpgm 1387; 1388; GFX11-LABEL: s_fdiv_v2f32_arcp_math: 1389; GFX11: ; %bb.0: ; %entry 1390; GFX11-NEXT: s_clause 0x1 1391; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c 1392; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 1393; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1394; GFX11-NEXT: v_rcp_f32_e32 v0, s3 1395; GFX11-NEXT: v_rcp_f32_e32 v2, s2 1396; GFX11-NEXT: v_mov_b32_e32 v3, 0 1397; GFX11-NEXT: s_waitcnt_depctr 0xfff 1398; GFX11-NEXT: v_dual_mul_f32 v1, s1, v0 :: v_dual_mul_f32 v0, s0, v2 1399; GFX11-NEXT: global_store_b64 v3, v[0:1], s[4:5] 1400; GFX11-NEXT: s_endpgm 1401; 1402; EG-LABEL: s_fdiv_v2f32_arcp_math: 1403; EG: ; %bb.0: ; %entry 1404; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 1405; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1406; EG-NEXT: CF_END 1407; EG-NEXT: PAD 1408; EG-NEXT: ALU clause starting at 4: 1409; EG-NEXT: RECIP_IEEE * T0.X, KC0[3].Z, 1410; EG-NEXT: MUL_IEEE T0.Y, PS, KC0[3].X, 1411; EG-NEXT: RECIP_IEEE * T0.X, KC0[3].Y, 1412; EG-NEXT: MUL_IEEE T0.X, PS, KC0[2].W, 1413; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1414; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1415entry: 1416 %fdiv = fdiv arcp ninf <2 x float> %a, %b 1417 store <2 x float> %fdiv, ptr addrspace(1) %out 1418 ret void 1419} 1420 1421define amdgpu_kernel void @s_fdiv_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1422; GFX6-FASTFMA-LABEL: s_fdiv_v4f32: 1423; GFX6-FASTFMA: ; %bb.0: 1424; GFX6-FASTFMA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 1425; GFX6-FASTFMA-NEXT: s_waitcnt lgkmcnt(0) 1426; GFX6-FASTFMA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1427; GFX6-FASTFMA-NEXT: s_mov_b32 s11, 0xf000 1428; GFX6-FASTFMA-NEXT: s_mov_b32 s10, -1 1429; GFX6-FASTFMA-NEXT: s_waitcnt lgkmcnt(0) 1430; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v1, s3 1431; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[12:13], s7, s7, v1 1432; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 1433; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v0, s7 1434; GFX6-FASTFMA-NEXT: v_div_scale_f32 v0, vcc, s3, v0, s3 1435; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1436; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 1437; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 1438; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v0, v3 1439; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v4, v0 1440; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v3, v4 1441; GFX6-FASTFMA-NEXT: v_fma_f32 v0, -v2, v4, v0 1442; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1443; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v0, v0, v3, v4 1444; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v3, v0, s7, v1 1445; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v1, s2 1446; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[12:13], s6, s6, v1 1447; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v2 1448; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v0, s6 1449; GFX6-FASTFMA-NEXT: v_div_scale_f32 v0, vcc, s2, v0, s2 1450; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1451; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 1452; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v4, v4 1453; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v0, v4 1454; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v0 1455; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v4, v5 1456; GFX6-FASTFMA-NEXT: v_fma_f32 v0, -v2, v5, v0 1457; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1458; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v0, v0, v4, v5 1459; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v2, v0, s6, v1 1460; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v1, s1 1461; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, s[2:3], s5, s5, v1 1462; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v5, v4 1463; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v0, s5 1464; GFX6-FASTFMA-NEXT: v_div_scale_f32 v0, vcc, s1, v0, s1 1465; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1466; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v4, v5, 1.0 1467; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v5, v5 1468; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v0, v5 1469; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v4, v6, v0 1470; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v5, v6 1471; GFX6-FASTFMA-NEXT: v_fma_f32 v0, -v4, v6, v0 1472; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1473; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v4, s0 1474; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v0, v0, v5, v6 1475; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, s[2:3], s4, s4, v4 1476; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v6, v5 1477; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v1, v0, s5, v1 1478; GFX6-FASTFMA-NEXT: v_mov_b32_e32 v0, s4 1479; GFX6-FASTFMA-NEXT: v_div_scale_f32 v0, vcc, s0, v0, s0 1480; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1481; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v5, v6, 1.0 1482; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v6, v6 1483; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v7, v0, v6 1484; GFX6-FASTFMA-NEXT: v_fma_f32 v8, -v5, v7, v0 1485; GFX6-FASTFMA-NEXT: v_fma_f32 v7, v8, v6, v7 1486; GFX6-FASTFMA-NEXT: v_fma_f32 v0, -v5, v7, v0 1487; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1488; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v0, v0, v6, v7 1489; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v0, s4, v4 1490; GFX6-FASTFMA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 1491; GFX6-FASTFMA-NEXT: s_endpgm 1492; 1493; GFX6-SLOWFMA-LABEL: s_fdiv_v4f32: 1494; GFX6-SLOWFMA: ; %bb.0: 1495; GFX6-SLOWFMA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 1496; GFX6-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0) 1497; GFX6-SLOWFMA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1498; GFX6-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0) 1499; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v0, s3 1500; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[10:11], s7, s7, v0 1501; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v2, s7 1502; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s3, v2, s3 1503; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v4, s2 1504; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v7, s1 1505; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v8, s0 1506; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 1507; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1508; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v3, 1.0 1509; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v5, v3, v3 1510; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v2, v3 1511; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v1, v5, v2 1512; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v3, v5 1513; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v5, v2 1514; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1515; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[10:11], s6, s6, v4 1516; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v5 1517; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v3, s6 1518; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v5, vcc, s2, v3, s2 1519; GFX6-SLOWFMA-NEXT: s_mov_b32 s11, 0xf000 1520; GFX6-SLOWFMA-NEXT: s_mov_b32 s10, -1 1521; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v6, v2 1522; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v3, v1, s7, v0 1523; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1524; GFX6-SLOWFMA-NEXT: v_fma_f32 v0, -v2, v6, 1.0 1525; GFX6-SLOWFMA-NEXT: v_fma_f32 v0, v0, v6, v6 1526; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v1, v5, v0 1527; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v1, v5 1528; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, v6, v0, v1 1529; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v1, v5 1530; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1531; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v5, s[2:3], s5, s5, v7 1532; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v0, v2, v0, v1 1533; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v1, s5 1534; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, vcc, s1, v1, s1 1535; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v6, v5 1536; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v2, v0, s6, v4 1537; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1538; GFX6-SLOWFMA-NEXT: v_fma_f32 v0, -v5, v6, 1.0 1539; GFX6-SLOWFMA-NEXT: v_fma_f32 v0, v0, v6, v6 1540; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v1, v0 1541; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v5, v4, v1 1542; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v6, v0, v4 1543; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v5, v4, v1 1544; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1545; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v5, s[2:3], s4, s4, v8 1546; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v0, v1, v0, v4 1547; GFX6-SLOWFMA-NEXT: v_mov_b32_e32 v1, s4 1548; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v4, vcc, s0, v1, s0 1549; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v6, v5 1550; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v1, v0, s5, v7 1551; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1552; GFX6-SLOWFMA-NEXT: v_fma_f32 v0, -v5, v6, 1.0 1553; GFX6-SLOWFMA-NEXT: v_fma_f32 v0, v0, v6, v6 1554; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v6, v4, v0 1555; GFX6-SLOWFMA-NEXT: v_fma_f32 v7, -v5, v6, v4 1556; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, v7, v0, v6 1557; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v5, v6, v4 1558; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1559; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v0, v4, v0, v6 1560; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v0, s4, v8 1561; GFX6-SLOWFMA-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 1562; GFX6-SLOWFMA-NEXT: s_endpgm 1563; 1564; GFX7-LABEL: s_fdiv_v4f32: 1565; GFX7: ; %bb.0: 1566; GFX7-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 1567; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1568; GFX7-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1569; GFX7-NEXT: s_mov_b32 s11, 0xf000 1570; GFX7-NEXT: s_mov_b32 s10, -1 1571; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1572; GFX7-NEXT: v_mov_b32_e32 v1, s3 1573; GFX7-NEXT: v_div_scale_f32 v2, s[12:13], s7, s7, v1 1574; GFX7-NEXT: v_rcp_f32_e32 v3, v2 1575; GFX7-NEXT: v_mov_b32_e32 v0, s7 1576; GFX7-NEXT: v_div_scale_f32 v0, vcc, s3, v0, s3 1577; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1578; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 1579; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 1580; GFX7-NEXT: v_mul_f32_e32 v4, v0, v3 1581; GFX7-NEXT: v_fma_f32 v5, -v2, v4, v0 1582; GFX7-NEXT: v_fma_f32 v4, v5, v3, v4 1583; GFX7-NEXT: v_fma_f32 v0, -v2, v4, v0 1584; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1585; GFX7-NEXT: v_div_fmas_f32 v0, v0, v3, v4 1586; GFX7-NEXT: v_div_fixup_f32 v3, v0, s7, v1 1587; GFX7-NEXT: v_mov_b32_e32 v1, s2 1588; GFX7-NEXT: v_div_scale_f32 v2, s[12:13], s6, s6, v1 1589; GFX7-NEXT: v_rcp_f32_e32 v4, v2 1590; GFX7-NEXT: v_mov_b32_e32 v0, s6 1591; GFX7-NEXT: v_div_scale_f32 v0, vcc, s2, v0, s2 1592; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1593; GFX7-NEXT: v_fma_f32 v5, -v2, v4, 1.0 1594; GFX7-NEXT: v_fma_f32 v4, v5, v4, v4 1595; GFX7-NEXT: v_mul_f32_e32 v5, v0, v4 1596; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v0 1597; GFX7-NEXT: v_fma_f32 v5, v6, v4, v5 1598; GFX7-NEXT: v_fma_f32 v0, -v2, v5, v0 1599; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1600; GFX7-NEXT: v_div_fmas_f32 v0, v0, v4, v5 1601; GFX7-NEXT: v_div_fixup_f32 v2, v0, s6, v1 1602; GFX7-NEXT: v_mov_b32_e32 v1, s1 1603; GFX7-NEXT: v_div_scale_f32 v4, s[2:3], s5, s5, v1 1604; GFX7-NEXT: v_rcp_f32_e32 v5, v4 1605; GFX7-NEXT: v_mov_b32_e32 v0, s5 1606; GFX7-NEXT: v_div_scale_f32 v0, vcc, s1, v0, s1 1607; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1608; GFX7-NEXT: v_fma_f32 v6, -v4, v5, 1.0 1609; GFX7-NEXT: v_fma_f32 v5, v6, v5, v5 1610; GFX7-NEXT: v_mul_f32_e32 v6, v0, v5 1611; GFX7-NEXT: v_fma_f32 v7, -v4, v6, v0 1612; GFX7-NEXT: v_fma_f32 v6, v7, v5, v6 1613; GFX7-NEXT: v_fma_f32 v0, -v4, v6, v0 1614; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1615; GFX7-NEXT: v_mov_b32_e32 v4, s0 1616; GFX7-NEXT: v_div_fmas_f32 v0, v0, v5, v6 1617; GFX7-NEXT: v_div_scale_f32 v5, s[2:3], s4, s4, v4 1618; GFX7-NEXT: v_rcp_f32_e32 v6, v5 1619; GFX7-NEXT: v_div_fixup_f32 v1, v0, s5, v1 1620; GFX7-NEXT: v_mov_b32_e32 v0, s4 1621; GFX7-NEXT: v_div_scale_f32 v0, vcc, s0, v0, s0 1622; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1623; GFX7-NEXT: v_fma_f32 v7, -v5, v6, 1.0 1624; GFX7-NEXT: v_fma_f32 v6, v7, v6, v6 1625; GFX7-NEXT: v_mul_f32_e32 v7, v0, v6 1626; GFX7-NEXT: v_fma_f32 v8, -v5, v7, v0 1627; GFX7-NEXT: v_fma_f32 v7, v8, v6, v7 1628; GFX7-NEXT: v_fma_f32 v0, -v5, v7, v0 1629; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1630; GFX7-NEXT: v_div_fmas_f32 v0, v0, v6, v7 1631; GFX7-NEXT: v_div_fixup_f32 v0, v0, s4, v4 1632; GFX7-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 1633; GFX7-NEXT: s_endpgm 1634; 1635; GFX8-LABEL: s_fdiv_v4f32: 1636; GFX8: ; %bb.0: 1637; GFX8-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 1638; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1639; GFX8-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1640; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1641; GFX8-NEXT: v_mov_b32_e32 v0, s3 1642; GFX8-NEXT: v_div_scale_f32 v1, s[10:11], s7, s7, v0 1643; GFX8-NEXT: v_mov_b32_e32 v2, s7 1644; GFX8-NEXT: v_div_scale_f32 v2, vcc, s3, v2, s3 1645; GFX8-NEXT: v_mov_b32_e32 v4, s2 1646; GFX8-NEXT: v_mov_b32_e32 v7, s1 1647; GFX8-NEXT: v_mov_b32_e32 v8, s0 1648; GFX8-NEXT: v_rcp_f32_e32 v3, v1 1649; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1650; GFX8-NEXT: v_fma_f32 v5, -v1, v3, 1.0 1651; GFX8-NEXT: v_fma_f32 v3, v5, v3, v3 1652; GFX8-NEXT: v_mul_f32_e32 v5, v2, v3 1653; GFX8-NEXT: v_fma_f32 v6, -v1, v5, v2 1654; GFX8-NEXT: v_fma_f32 v5, v6, v3, v5 1655; GFX8-NEXT: v_fma_f32 v1, -v1, v5, v2 1656; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1657; GFX8-NEXT: v_div_scale_f32 v2, s[10:11], s6, s6, v4 1658; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v5 1659; GFX8-NEXT: v_mov_b32_e32 v3, s6 1660; GFX8-NEXT: v_div_scale_f32 v5, vcc, s2, v3, s2 1661; GFX8-NEXT: v_rcp_f32_e32 v6, v2 1662; GFX8-NEXT: v_div_fixup_f32 v3, v1, s7, v0 1663; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1664; GFX8-NEXT: v_fma_f32 v0, -v2, v6, 1.0 1665; GFX8-NEXT: v_fma_f32 v0, v0, v6, v6 1666; GFX8-NEXT: v_mul_f32_e32 v1, v5, v0 1667; GFX8-NEXT: v_fma_f32 v6, -v2, v1, v5 1668; GFX8-NEXT: v_fma_f32 v1, v6, v0, v1 1669; GFX8-NEXT: v_fma_f32 v2, -v2, v1, v5 1670; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1671; GFX8-NEXT: v_div_scale_f32 v5, s[2:3], s5, s5, v7 1672; GFX8-NEXT: v_div_fmas_f32 v0, v2, v0, v1 1673; GFX8-NEXT: v_mov_b32_e32 v1, s5 1674; GFX8-NEXT: v_div_scale_f32 v1, vcc, s1, v1, s1 1675; GFX8-NEXT: v_rcp_f32_e32 v6, v5 1676; GFX8-NEXT: v_div_fixup_f32 v2, v0, s6, v4 1677; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1678; GFX8-NEXT: v_fma_f32 v0, -v5, v6, 1.0 1679; GFX8-NEXT: v_fma_f32 v0, v0, v6, v6 1680; GFX8-NEXT: v_mul_f32_e32 v4, v1, v0 1681; GFX8-NEXT: v_fma_f32 v6, -v5, v4, v1 1682; GFX8-NEXT: v_fma_f32 v4, v6, v0, v4 1683; GFX8-NEXT: v_fma_f32 v1, -v5, v4, v1 1684; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1685; GFX8-NEXT: v_div_scale_f32 v5, s[2:3], s4, s4, v8 1686; GFX8-NEXT: v_div_fmas_f32 v0, v1, v0, v4 1687; GFX8-NEXT: v_mov_b32_e32 v1, s4 1688; GFX8-NEXT: v_div_scale_f32 v4, vcc, s0, v1, s0 1689; GFX8-NEXT: v_rcp_f32_e32 v6, v5 1690; GFX8-NEXT: v_div_fixup_f32 v1, v0, s5, v7 1691; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1692; GFX8-NEXT: v_fma_f32 v0, -v5, v6, 1.0 1693; GFX8-NEXT: v_fma_f32 v0, v0, v6, v6 1694; GFX8-NEXT: v_mul_f32_e32 v6, v4, v0 1695; GFX8-NEXT: v_fma_f32 v7, -v5, v6, v4 1696; GFX8-NEXT: v_fma_f32 v6, v7, v0, v6 1697; GFX8-NEXT: v_fma_f32 v4, -v5, v6, v4 1698; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1699; GFX8-NEXT: v_div_fmas_f32 v0, v4, v0, v6 1700; GFX8-NEXT: v_mov_b32_e32 v4, s8 1701; GFX8-NEXT: v_mov_b32_e32 v5, s9 1702; GFX8-NEXT: v_div_fixup_f32 v0, v0, s4, v8 1703; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1704; GFX8-NEXT: s_endpgm 1705; 1706; GFX10-LABEL: s_fdiv_v4f32: 1707; GFX10: ; %bb.0: 1708; GFX10-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 1709; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1710; GFX10-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1711; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1712; GFX10-NEXT: v_div_scale_f32 v0, s10, s7, s7, s3 1713; GFX10-NEXT: v_div_scale_f32 v2, vcc_lo, s3, s7, s3 1714; GFX10-NEXT: v_rcp_f32_e32 v1, v0 1715; GFX10-NEXT: s_denorm_mode 15 1716; GFX10-NEXT: v_fma_f32 v3, -v0, v1, 1.0 1717; GFX10-NEXT: v_fmac_f32_e32 v1, v3, v1 1718; GFX10-NEXT: v_mul_f32_e32 v3, v2, v1 1719; GFX10-NEXT: v_fma_f32 v4, -v0, v3, v2 1720; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v1 1721; GFX10-NEXT: v_fma_f32 v0, -v0, v3, v2 1722; GFX10-NEXT: s_denorm_mode 12 1723; GFX10-NEXT: v_div_scale_f32 v2, s10, s6, s6, s2 1724; GFX10-NEXT: v_div_fmas_f32 v0, v0, v1, v3 1725; GFX10-NEXT: v_div_scale_f32 v1, vcc_lo, s2, s6, s2 1726; GFX10-NEXT: v_rcp_f32_e32 v4, v2 1727; GFX10-NEXT: v_div_fixup_f32 v3, v0, s7, s3 1728; GFX10-NEXT: s_denorm_mode 15 1729; GFX10-NEXT: v_fma_f32 v0, -v2, v4, 1.0 1730; GFX10-NEXT: v_fmac_f32_e32 v4, v0, v4 1731; GFX10-NEXT: v_mul_f32_e32 v0, v1, v4 1732; GFX10-NEXT: v_fma_f32 v5, -v2, v0, v1 1733; GFX10-NEXT: v_fmac_f32_e32 v0, v5, v4 1734; GFX10-NEXT: v_fma_f32 v1, -v2, v0, v1 1735; GFX10-NEXT: s_denorm_mode 12 1736; GFX10-NEXT: v_div_scale_f32 v5, s3, s5, s5, s1 1737; GFX10-NEXT: v_div_fmas_f32 v0, v1, v4, v0 1738; GFX10-NEXT: v_div_scale_f32 v1, vcc_lo, s1, s5, s1 1739; GFX10-NEXT: v_rcp_f32_e32 v6, v5 1740; GFX10-NEXT: v_div_fixup_f32 v2, v0, s6, s2 1741; GFX10-NEXT: s_denorm_mode 15 1742; GFX10-NEXT: v_fma_f32 v0, -v5, v6, 1.0 1743; GFX10-NEXT: v_fmac_f32_e32 v6, v0, v6 1744; GFX10-NEXT: v_mul_f32_e32 v0, v1, v6 1745; GFX10-NEXT: v_fma_f32 v4, -v5, v0, v1 1746; GFX10-NEXT: v_fmac_f32_e32 v0, v4, v6 1747; GFX10-NEXT: v_fma_f32 v1, -v5, v0, v1 1748; GFX10-NEXT: s_denorm_mode 12 1749; GFX10-NEXT: v_div_scale_f32 v4, s2, s4, s4, s0 1750; GFX10-NEXT: v_div_fmas_f32 v0, v1, v6, v0 1751; GFX10-NEXT: v_rcp_f32_e32 v5, v4 1752; GFX10-NEXT: v_div_fixup_f32 v1, v0, s5, s1 1753; GFX10-NEXT: v_div_scale_f32 v0, vcc_lo, s0, s4, s0 1754; GFX10-NEXT: s_denorm_mode 15 1755; GFX10-NEXT: v_fma_f32 v6, -v4, v5, 1.0 1756; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v5 1757; GFX10-NEXT: v_mul_f32_e32 v6, v0, v5 1758; GFX10-NEXT: v_fma_f32 v7, -v4, v6, v0 1759; GFX10-NEXT: v_fmac_f32_e32 v6, v7, v5 1760; GFX10-NEXT: v_fma_f32 v0, -v4, v6, v0 1761; GFX10-NEXT: s_denorm_mode 12 1762; GFX10-NEXT: v_mov_b32_e32 v4, 0 1763; GFX10-NEXT: v_div_fmas_f32 v0, v0, v5, v6 1764; GFX10-NEXT: v_div_fixup_f32 v0, v0, s4, s0 1765; GFX10-NEXT: global_store_dwordx4 v4, v[0:3], s[8:9] 1766; GFX10-NEXT: s_endpgm 1767; 1768; GFX11-LABEL: s_fdiv_v4f32: 1769; GFX11: ; %bb.0: 1770; GFX11-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 1771; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1772; GFX11-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 1773; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1774; GFX11-NEXT: v_div_scale_f32 v0, null, s7, s7, s3 1775; GFX11-NEXT: v_div_scale_f32 v2, vcc_lo, s3, s7, s3 1776; GFX11-NEXT: v_rcp_f32_e32 v1, v0 1777; GFX11-NEXT: s_denorm_mode 15 1778; GFX11-NEXT: s_waitcnt_depctr 0xfff 1779; GFX11-NEXT: v_fma_f32 v3, -v0, v1, 1.0 1780; GFX11-NEXT: v_fmac_f32_e32 v1, v3, v1 1781; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1 1782; GFX11-NEXT: v_fma_f32 v4, -v0, v3, v2 1783; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v1 1784; GFX11-NEXT: v_fma_f32 v0, -v0, v3, v2 1785; GFX11-NEXT: s_denorm_mode 12 1786; GFX11-NEXT: v_div_scale_f32 v2, null, s6, s6, s2 1787; GFX11-NEXT: v_div_fmas_f32 v0, v0, v1, v3 1788; GFX11-NEXT: v_div_scale_f32 v1, vcc_lo, s2, s6, s2 1789; GFX11-NEXT: v_rcp_f32_e32 v4, v2 1790; GFX11-NEXT: v_div_fixup_f32 v3, v0, s7, s3 1791; GFX11-NEXT: s_denorm_mode 15 1792; GFX11-NEXT: s_waitcnt_depctr 0xfff 1793; GFX11-NEXT: v_fma_f32 v0, -v2, v4, 1.0 1794; GFX11-NEXT: v_fmac_f32_e32 v4, v0, v4 1795; GFX11-NEXT: v_mul_f32_e32 v0, v1, v4 1796; GFX11-NEXT: v_fma_f32 v5, -v2, v0, v1 1797; GFX11-NEXT: v_fmac_f32_e32 v0, v5, v4 1798; GFX11-NEXT: v_fma_f32 v1, -v2, v0, v1 1799; GFX11-NEXT: s_denorm_mode 12 1800; GFX11-NEXT: v_div_scale_f32 v5, null, s5, s5, s1 1801; GFX11-NEXT: v_div_fmas_f32 v0, v1, v4, v0 1802; GFX11-NEXT: v_div_scale_f32 v1, vcc_lo, s1, s5, s1 1803; GFX11-NEXT: v_rcp_f32_e32 v6, v5 1804; GFX11-NEXT: v_div_fixup_f32 v2, v0, s6, s2 1805; GFX11-NEXT: s_denorm_mode 15 1806; GFX11-NEXT: s_waitcnt_depctr 0xfff 1807; GFX11-NEXT: v_fma_f32 v0, -v5, v6, 1.0 1808; GFX11-NEXT: v_fmac_f32_e32 v6, v0, v6 1809; GFX11-NEXT: v_mul_f32_e32 v0, v1, v6 1810; GFX11-NEXT: v_fma_f32 v4, -v5, v0, v1 1811; GFX11-NEXT: v_fmac_f32_e32 v0, v4, v6 1812; GFX11-NEXT: v_fma_f32 v1, -v5, v0, v1 1813; GFX11-NEXT: s_denorm_mode 12 1814; GFX11-NEXT: v_div_scale_f32 v4, null, s4, s4, s0 1815; GFX11-NEXT: v_div_fmas_f32 v0, v1, v6, v0 1816; GFX11-NEXT: v_rcp_f32_e32 v5, v4 1817; GFX11-NEXT: v_div_fixup_f32 v1, v0, s5, s1 1818; GFX11-NEXT: v_div_scale_f32 v0, vcc_lo, s0, s4, s0 1819; GFX11-NEXT: s_denorm_mode 15 1820; GFX11-NEXT: s_waitcnt_depctr 0xfff 1821; GFX11-NEXT: v_fma_f32 v6, -v4, v5, 1.0 1822; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v5 1823; GFX11-NEXT: v_mul_f32_e32 v6, v0, v5 1824; GFX11-NEXT: v_fma_f32 v7, -v4, v6, v0 1825; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v5 1826; GFX11-NEXT: v_fma_f32 v0, -v4, v6, v0 1827; GFX11-NEXT: s_denorm_mode 12 1828; GFX11-NEXT: v_mov_b32_e32 v4, 0 1829; GFX11-NEXT: v_div_fmas_f32 v0, v0, v5, v6 1830; GFX11-NEXT: v_div_fixup_f32 v0, v0, s4, s0 1831; GFX11-NEXT: global_store_b128 v4, v[0:3], s[8:9] 1832; GFX11-NEXT: s_endpgm 1833; 1834; EG-LABEL: s_fdiv_v4f32: 1835; EG: ; %bb.0: 1836; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 1837; EG-NEXT: TEX 1 @6 1838; EG-NEXT: ALU 9, @11, KC0[CB0:0-32], KC1[] 1839; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 1840; EG-NEXT: CF_END 1841; EG-NEXT: PAD 1842; EG-NEXT: Fetch clause starting at 6: 1843; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 1844; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 1845; EG-NEXT: ALU clause starting at 10: 1846; EG-NEXT: MOV * T0.X, KC0[2].Z, 1847; EG-NEXT: ALU clause starting at 11: 1848; EG-NEXT: RECIP_IEEE * T1.W, T1.W, 1849; EG-NEXT: MUL_IEEE T0.W, T0.W, PS, 1850; EG-NEXT: RECIP_IEEE * T1.Z, T1.Z, 1851; EG-NEXT: MUL_IEEE T0.Z, T0.Z, PS, 1852; EG-NEXT: RECIP_IEEE * T1.Y, T1.Y, 1853; EG-NEXT: MUL_IEEE T0.Y, T0.Y, PS, 1854; EG-NEXT: RECIP_IEEE * T1.X, T1.X, 1855; EG-NEXT: MUL_IEEE T0.X, T0.X, PS, 1856; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1857; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1858 %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1 1859 %a = load <4 x float>, ptr addrspace(1) %in 1860 %b = load <4 x float>, ptr addrspace(1) %b_ptr 1861 %result = fdiv <4 x float> %a, %b 1862 store <4 x float> %result, ptr addrspace(1) %out 1863 ret void 1864} 1865 1866define amdgpu_kernel void @s_fdiv_v4f32_fast_math(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1867; GFX67-LABEL: s_fdiv_v4f32_fast_math: 1868; GFX67: ; %bb.0: 1869; GFX67-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 1870; GFX67-NEXT: s_waitcnt lgkmcnt(0) 1871; GFX67-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1872; GFX67-NEXT: s_mov_b32 s11, 0xf000 1873; GFX67-NEXT: s_mov_b32 s10, -1 1874; GFX67-NEXT: s_waitcnt lgkmcnt(0) 1875; GFX67-NEXT: v_rcp_f32_e32 v0, s7 1876; GFX67-NEXT: v_rcp_f32_e32 v1, s6 1877; GFX67-NEXT: v_rcp_f32_e32 v4, s5 1878; GFX67-NEXT: v_rcp_f32_e32 v5, s4 1879; GFX67-NEXT: v_mul_f32_e32 v3, s3, v0 1880; GFX67-NEXT: v_mul_f32_e32 v2, s2, v1 1881; GFX67-NEXT: v_mul_f32_e32 v1, s1, v4 1882; GFX67-NEXT: v_mul_f32_e32 v0, s0, v5 1883; GFX67-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 1884; GFX67-NEXT: s_endpgm 1885; 1886; GFX8-LABEL: s_fdiv_v4f32_fast_math: 1887; GFX8: ; %bb.0: 1888; GFX8-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 1889; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1890; GFX8-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1891; GFX8-NEXT: v_mov_b32_e32 v4, s8 1892; GFX8-NEXT: v_mov_b32_e32 v5, s9 1893; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1894; GFX8-NEXT: v_rcp_f32_e32 v0, s7 1895; GFX8-NEXT: v_rcp_f32_e32 v1, s6 1896; GFX8-NEXT: v_rcp_f32_e32 v6, s5 1897; GFX8-NEXT: v_rcp_f32_e32 v7, s4 1898; GFX8-NEXT: v_mul_f32_e32 v3, s3, v0 1899; GFX8-NEXT: v_mul_f32_e32 v2, s2, v1 1900; GFX8-NEXT: v_mul_f32_e32 v1, s1, v6 1901; GFX8-NEXT: v_mul_f32_e32 v0, s0, v7 1902; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1903; GFX8-NEXT: s_endpgm 1904; 1905; GFX10-LABEL: s_fdiv_v4f32_fast_math: 1906; GFX10: ; %bb.0: 1907; GFX10-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 1908; GFX10-NEXT: v_mov_b32_e32 v6, 0 1909; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1910; GFX10-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1911; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1912; GFX10-NEXT: v_rcp_f32_e32 v0, s7 1913; GFX10-NEXT: v_rcp_f32_e32 v1, s6 1914; GFX10-NEXT: v_rcp_f32_e32 v4, s5 1915; GFX10-NEXT: v_rcp_f32_e32 v5, s4 1916; GFX10-NEXT: v_mul_f32_e32 v3, s3, v0 1917; GFX10-NEXT: v_mul_f32_e32 v2, s2, v1 1918; GFX10-NEXT: v_mul_f32_e32 v1, s1, v4 1919; GFX10-NEXT: v_mul_f32_e32 v0, s0, v5 1920; GFX10-NEXT: global_store_dwordx4 v6, v[0:3], s[8:9] 1921; GFX10-NEXT: s_endpgm 1922; 1923; GFX11-LABEL: s_fdiv_v4f32_fast_math: 1924; GFX11: ; %bb.0: 1925; GFX11-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 1926; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1927; GFX11-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 1928; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1929; GFX11-NEXT: v_rcp_f32_e32 v0, s7 1930; GFX11-NEXT: v_rcp_f32_e32 v1, s6 1931; GFX11-NEXT: v_rcp_f32_e32 v4, s5 1932; GFX11-NEXT: v_rcp_f32_e32 v5, s4 1933; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mul_f32 v3, s3, v0 1934; GFX11-NEXT: s_waitcnt_depctr 0xfff 1935; GFX11-NEXT: v_dual_mul_f32 v2, s2, v1 :: v_dual_mul_f32 v1, s1, v4 1936; GFX11-NEXT: v_mul_f32_e32 v0, s0, v5 1937; GFX11-NEXT: global_store_b128 v6, v[0:3], s[8:9] 1938; GFX11-NEXT: s_endpgm 1939; 1940; EG-LABEL: s_fdiv_v4f32_fast_math: 1941; EG: ; %bb.0: 1942; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 1943; EG-NEXT: TEX 1 @6 1944; EG-NEXT: ALU 9, @11, KC0[CB0:0-32], KC1[] 1945; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 1946; EG-NEXT: CF_END 1947; EG-NEXT: PAD 1948; EG-NEXT: Fetch clause starting at 6: 1949; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 1950; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 1951; EG-NEXT: ALU clause starting at 10: 1952; EG-NEXT: MOV * T0.X, KC0[2].Z, 1953; EG-NEXT: ALU clause starting at 11: 1954; EG-NEXT: RECIP_IEEE * T1.W, T1.W, 1955; EG-NEXT: MUL_IEEE T0.W, PS, T0.W, 1956; EG-NEXT: RECIP_IEEE * T1.Z, T1.Z, 1957; EG-NEXT: MUL_IEEE T0.Z, PS, T0.Z, 1958; EG-NEXT: RECIP_IEEE * T1.Y, T1.Y, 1959; EG-NEXT: MUL_IEEE T0.Y, PS, T0.Y, 1960; EG-NEXT: RECIP_IEEE * T1.X, T1.X, 1961; EG-NEXT: MUL_IEEE T0.X, PS, T0.X, 1962; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1963; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1964 %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1 1965 %a = load <4 x float>, ptr addrspace(1) %in 1966 %b = load <4 x float>, ptr addrspace(1) %b_ptr 1967 %result = fdiv fast <4 x float> %a, %b 1968 store <4 x float> %result, ptr addrspace(1) %out 1969 ret void 1970} 1971 1972define amdgpu_kernel void @s_fdiv_v4f32_arcp_math(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 1973; GFX67-LABEL: s_fdiv_v4f32_arcp_math: 1974; GFX67: ; %bb.0: 1975; GFX67-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 1976; GFX67-NEXT: s_waitcnt lgkmcnt(0) 1977; GFX67-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1978; GFX67-NEXT: s_mov_b32 s11, 0xf000 1979; GFX67-NEXT: s_mov_b32 s10, -1 1980; GFX67-NEXT: s_waitcnt lgkmcnt(0) 1981; GFX67-NEXT: v_rcp_f32_e32 v0, s7 1982; GFX67-NEXT: v_rcp_f32_e32 v1, s6 1983; GFX67-NEXT: v_rcp_f32_e32 v4, s5 1984; GFX67-NEXT: v_rcp_f32_e32 v5, s4 1985; GFX67-NEXT: v_mul_f32_e32 v3, s3, v0 1986; GFX67-NEXT: v_mul_f32_e32 v2, s2, v1 1987; GFX67-NEXT: v_mul_f32_e32 v1, s1, v4 1988; GFX67-NEXT: v_mul_f32_e32 v0, s0, v5 1989; GFX67-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 1990; GFX67-NEXT: s_endpgm 1991; 1992; GFX8-LABEL: s_fdiv_v4f32_arcp_math: 1993; GFX8: ; %bb.0: 1994; GFX8-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 1995; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1996; GFX8-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 1997; GFX8-NEXT: v_mov_b32_e32 v4, s8 1998; GFX8-NEXT: v_mov_b32_e32 v5, s9 1999; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2000; GFX8-NEXT: v_rcp_f32_e32 v0, s7 2001; GFX8-NEXT: v_rcp_f32_e32 v1, s6 2002; GFX8-NEXT: v_rcp_f32_e32 v6, s5 2003; GFX8-NEXT: v_rcp_f32_e32 v7, s4 2004; GFX8-NEXT: v_mul_f32_e32 v3, s3, v0 2005; GFX8-NEXT: v_mul_f32_e32 v2, s2, v1 2006; GFX8-NEXT: v_mul_f32_e32 v1, s1, v6 2007; GFX8-NEXT: v_mul_f32_e32 v0, s0, v7 2008; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2009; GFX8-NEXT: s_endpgm 2010; 2011; GFX10-LABEL: s_fdiv_v4f32_arcp_math: 2012; GFX10: ; %bb.0: 2013; GFX10-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 2014; GFX10-NEXT: v_mov_b32_e32 v6, 0 2015; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2016; GFX10-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 2017; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2018; GFX10-NEXT: v_rcp_f32_e32 v0, s7 2019; GFX10-NEXT: v_rcp_f32_e32 v1, s6 2020; GFX10-NEXT: v_rcp_f32_e32 v4, s5 2021; GFX10-NEXT: v_rcp_f32_e32 v5, s4 2022; GFX10-NEXT: v_mul_f32_e32 v3, s3, v0 2023; GFX10-NEXT: v_mul_f32_e32 v2, s2, v1 2024; GFX10-NEXT: v_mul_f32_e32 v1, s1, v4 2025; GFX10-NEXT: v_mul_f32_e32 v0, s0, v5 2026; GFX10-NEXT: global_store_dwordx4 v6, v[0:3], s[8:9] 2027; GFX10-NEXT: s_endpgm 2028; 2029; GFX11-LABEL: s_fdiv_v4f32_arcp_math: 2030; GFX11: ; %bb.0: 2031; GFX11-NEXT: s_load_b128 s[8:11], s[4:5], 0x24 2032; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2033; GFX11-NEXT: s_load_b256 s[0:7], s[10:11], 0x0 2034; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2035; GFX11-NEXT: v_rcp_f32_e32 v0, s7 2036; GFX11-NEXT: v_rcp_f32_e32 v1, s6 2037; GFX11-NEXT: v_rcp_f32_e32 v4, s5 2038; GFX11-NEXT: v_rcp_f32_e32 v5, s4 2039; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mul_f32 v3, s3, v0 2040; GFX11-NEXT: s_waitcnt_depctr 0xfff 2041; GFX11-NEXT: v_dual_mul_f32 v2, s2, v1 :: v_dual_mul_f32 v1, s1, v4 2042; GFX11-NEXT: v_mul_f32_e32 v0, s0, v5 2043; GFX11-NEXT: global_store_b128 v6, v[0:3], s[8:9] 2044; GFX11-NEXT: s_endpgm 2045; 2046; EG-LABEL: s_fdiv_v4f32_arcp_math: 2047; EG: ; %bb.0: 2048; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 2049; EG-NEXT: TEX 1 @6 2050; EG-NEXT: ALU 9, @11, KC0[CB0:0-32], KC1[] 2051; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 2052; EG-NEXT: CF_END 2053; EG-NEXT: PAD 2054; EG-NEXT: Fetch clause starting at 6: 2055; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 2056; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 2057; EG-NEXT: ALU clause starting at 10: 2058; EG-NEXT: MOV * T0.X, KC0[2].Z, 2059; EG-NEXT: ALU clause starting at 11: 2060; EG-NEXT: RECIP_IEEE * T1.W, T1.W, 2061; EG-NEXT: MUL_IEEE T0.W, PS, T0.W, 2062; EG-NEXT: RECIP_IEEE * T1.Z, T1.Z, 2063; EG-NEXT: MUL_IEEE T0.Z, PS, T0.Z, 2064; EG-NEXT: RECIP_IEEE * T1.Y, T1.Y, 2065; EG-NEXT: MUL_IEEE T0.Y, PS, T0.Y, 2066; EG-NEXT: RECIP_IEEE * T1.X, T1.X, 2067; EG-NEXT: MUL_IEEE T0.X, PS, T0.X, 2068; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 2069; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2070 %b_ptr = getelementptr <4 x float>, ptr addrspace(1) %in, i32 1 2071 %a = load <4 x float>, ptr addrspace(1) %in 2072 %b = load <4 x float>, ptr addrspace(1) %b_ptr 2073 %result = fdiv arcp ninf <4 x float> %a, %b 2074 store <4 x float> %result, ptr addrspace(1) %out 2075 ret void 2076} 2077 2078define amdgpu_kernel void @s_fdiv_f32_correctly_rounded_divide_sqrt(ptr addrspace(1) %out, float %a) #0 { 2079; GFX6-FASTFMA-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt: 2080; GFX6-FASTFMA: ; %bb.0: ; %entry 2081; GFX6-FASTFMA-NEXT: s_load_dword s6, s[4:5], 0xb 2082; GFX6-FASTFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 2083; GFX6-FASTFMA-NEXT: s_mov_b32 s3, 0xf000 2084; GFX6-FASTFMA-NEXT: s_mov_b32 s2, -1 2085; GFX6-FASTFMA-NEXT: s_waitcnt lgkmcnt(0) 2086; GFX6-FASTFMA-NEXT: v_div_scale_f32 v0, s[4:5], s6, s6, 1.0 2087; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v1, v0 2088; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, vcc, 1.0, s6, 1.0 2089; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2090; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v0, v1, 1.0 2091; GFX6-FASTFMA-NEXT: v_fma_f32 v1, v3, v1, v1 2092; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v3, v2, v1 2093; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v0, v3, v2 2094; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v1, v3 2095; GFX6-FASTFMA-NEXT: v_fma_f32 v0, -v0, v3, v2 2096; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 2097; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v0, v0, v1, v3 2098; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v0, s6, 1.0 2099; GFX6-FASTFMA-NEXT: buffer_store_dword v0, off, s[0:3], 0 2100; GFX6-FASTFMA-NEXT: s_endpgm 2101; 2102; GFX6-SLOWFMA-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt: 2103; GFX6-SLOWFMA: ; %bb.0: ; %entry 2104; GFX6-SLOWFMA-NEXT: s_load_dword s6, s[4:5], 0xb 2105; GFX6-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000 2106; GFX6-SLOWFMA-NEXT: s_mov_b32 s2, -1 2107; GFX6-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0) 2108; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v0, s[0:1], s6, s6, 1.0 2109; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, vcc, 1.0, s6, 1.0 2110; GFX6-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 2111; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v2, v0 2112; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2113; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, -v0, v2, 1.0 2114; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, v3, v2, v2 2115; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v3, v1, v2 2116; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v0, v3, v1 2117; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v2, v3 2118; GFX6-SLOWFMA-NEXT: v_fma_f32 v0, -v0, v3, v1 2119; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 2120; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v0, v0, v2, v3 2121; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v0, s6, 1.0 2122; GFX6-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0) 2123; GFX6-SLOWFMA-NEXT: buffer_store_dword v0, off, s[0:3], 0 2124; GFX6-SLOWFMA-NEXT: s_endpgm 2125; 2126; GFX7-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt: 2127; GFX7: ; %bb.0: ; %entry 2128; GFX7-NEXT: s_load_dword s6, s[4:5], 0xb 2129; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 2130; GFX7-NEXT: s_mov_b32 s3, 0xf000 2131; GFX7-NEXT: s_mov_b32 s2, -1 2132; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2133; GFX7-NEXT: v_div_scale_f32 v0, s[4:5], s6, s6, 1.0 2134; GFX7-NEXT: v_rcp_f32_e32 v1, v0 2135; GFX7-NEXT: v_div_scale_f32 v2, vcc, 1.0, s6, 1.0 2136; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2137; GFX7-NEXT: v_fma_f32 v3, -v0, v1, 1.0 2138; GFX7-NEXT: v_fma_f32 v1, v3, v1, v1 2139; GFX7-NEXT: v_mul_f32_e32 v3, v2, v1 2140; GFX7-NEXT: v_fma_f32 v4, -v0, v3, v2 2141; GFX7-NEXT: v_fma_f32 v3, v4, v1, v3 2142; GFX7-NEXT: v_fma_f32 v0, -v0, v3, v2 2143; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 2144; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v3 2145; GFX7-NEXT: v_div_fixup_f32 v0, v0, s6, 1.0 2146; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 2147; GFX7-NEXT: s_endpgm 2148; 2149; GFX8-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt: 2150; GFX8: ; %bb.0: ; %entry 2151; GFX8-NEXT: s_load_dword s2, s[4:5], 0x2c 2152; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2153; GFX8-NEXT: v_div_scale_f32 v0, s[0:1], s2, s2, 1.0 2154; GFX8-NEXT: v_div_scale_f32 v1, vcc, 1.0, s2, 1.0 2155; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2156; GFX8-NEXT: v_rcp_f32_e32 v2, v0 2157; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2158; GFX8-NEXT: v_fma_f32 v3, -v0, v2, 1.0 2159; GFX8-NEXT: v_fma_f32 v2, v3, v2, v2 2160; GFX8-NEXT: v_mul_f32_e32 v3, v1, v2 2161; GFX8-NEXT: v_fma_f32 v4, -v0, v3, v1 2162; GFX8-NEXT: v_fma_f32 v3, v4, v2, v3 2163; GFX8-NEXT: v_fma_f32 v0, -v0, v3, v1 2164; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 2165; GFX8-NEXT: v_div_fmas_f32 v0, v0, v2, v3 2166; GFX8-NEXT: v_div_fixup_f32 v2, v0, s2, 1.0 2167; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2168; GFX8-NEXT: v_mov_b32_e32 v0, s0 2169; GFX8-NEXT: v_mov_b32_e32 v1, s1 2170; GFX8-NEXT: flat_store_dword v[0:1], v2 2171; GFX8-NEXT: s_endpgm 2172; 2173; GFX10-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt: 2174; GFX10: ; %bb.0: ; %entry 2175; GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2176; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2177; GFX10-NEXT: v_div_scale_f32 v0, s0, s2, s2, 1.0 2178; GFX10-NEXT: v_div_scale_f32 v2, vcc_lo, 1.0, s2, 1.0 2179; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2180; GFX10-NEXT: v_rcp_f32_e32 v1, v0 2181; GFX10-NEXT: s_denorm_mode 15 2182; GFX10-NEXT: v_fma_f32 v3, -v0, v1, 1.0 2183; GFX10-NEXT: v_fmac_f32_e32 v1, v3, v1 2184; GFX10-NEXT: v_mul_f32_e32 v3, v2, v1 2185; GFX10-NEXT: v_fma_f32 v4, -v0, v3, v2 2186; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v1 2187; GFX10-NEXT: v_fma_f32 v0, -v0, v3, v2 2188; GFX10-NEXT: s_denorm_mode 12 2189; GFX10-NEXT: v_div_fmas_f32 v0, v0, v1, v3 2190; GFX10-NEXT: v_mov_b32_e32 v1, 0 2191; GFX10-NEXT: v_div_fixup_f32 v0, v0, s2, 1.0 2192; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2193; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 2194; GFX10-NEXT: s_endpgm 2195; 2196; GFX11-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt: 2197; GFX11: ; %bb.0: ; %entry 2198; GFX11-NEXT: s_clause 0x1 2199; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2200; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2201; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2202; GFX11-NEXT: v_div_scale_f32 v0, null, s2, s2, 1.0 2203; GFX11-NEXT: v_div_scale_f32 v2, vcc_lo, 1.0, s2, 1.0 2204; GFX11-NEXT: v_rcp_f32_e32 v1, v0 2205; GFX11-NEXT: s_denorm_mode 15 2206; GFX11-NEXT: s_waitcnt_depctr 0xfff 2207; GFX11-NEXT: v_fma_f32 v3, -v0, v1, 1.0 2208; GFX11-NEXT: v_fmac_f32_e32 v1, v3, v1 2209; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1 2210; GFX11-NEXT: v_fma_f32 v4, -v0, v3, v2 2211; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v1 2212; GFX11-NEXT: v_fma_f32 v0, -v0, v3, v2 2213; GFX11-NEXT: s_denorm_mode 12 2214; GFX11-NEXT: v_div_fmas_f32 v0, v0, v1, v3 2215; GFX11-NEXT: v_mov_b32_e32 v1, 0 2216; GFX11-NEXT: v_div_fixup_f32 v0, v0, s2, 1.0 2217; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 2218; GFX11-NEXT: s_endpgm 2219; 2220; EG-LABEL: s_fdiv_f32_correctly_rounded_divide_sqrt: 2221; EG: ; %bb.0: ; %entry 2222; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 2223; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 2224; EG-NEXT: CF_END 2225; EG-NEXT: PAD 2226; EG-NEXT: ALU clause starting at 4: 2227; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 2228; EG-NEXT: RECIP_IEEE * T1.X, KC0[2].Z, 2229; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2230entry: 2231 %fdiv = fdiv float 1.000000e+00, %a 2232 store float %fdiv, ptr addrspace(1) %out 2233 ret void 2234} 2235 2236define amdgpu_kernel void @s_fdiv_f32_denorms_correctly_rounded_divide_sqrt(ptr addrspace(1) %out, float %a) #1 { 2237; GFX6-FASTFMA-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt: 2238; GFX6-FASTFMA: ; %bb.0: ; %entry 2239; GFX6-FASTFMA-NEXT: s_load_dword s6, s[4:5], 0xb 2240; GFX6-FASTFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 2241; GFX6-FASTFMA-NEXT: s_mov_b32 s3, 0xf000 2242; GFX6-FASTFMA-NEXT: s_mov_b32 s2, -1 2243; GFX6-FASTFMA-NEXT: s_waitcnt lgkmcnt(0) 2244; GFX6-FASTFMA-NEXT: v_div_scale_f32 v0, s[4:5], s6, s6, 1.0 2245; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v1, v0 2246; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, vcc, 1.0, s6, 1.0 2247; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v0, v1, 1.0 2248; GFX6-FASTFMA-NEXT: v_fma_f32 v1, v3, v1, v1 2249; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v3, v2, v1 2250; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v0, v3, v2 2251; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v1, v3 2252; GFX6-FASTFMA-NEXT: v_fma_f32 v0, -v0, v3, v2 2253; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v0, v0, v1, v3 2254; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v0, s6, 1.0 2255; GFX6-FASTFMA-NEXT: buffer_store_dword v0, off, s[0:3], 0 2256; GFX6-FASTFMA-NEXT: s_endpgm 2257; 2258; GFX6-SLOWFMA-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt: 2259; GFX6-SLOWFMA: ; %bb.0: ; %entry 2260; GFX6-SLOWFMA-NEXT: s_load_dword s6, s[4:5], 0xb 2261; GFX6-SLOWFMA-NEXT: s_mov_b32 s3, 0xf000 2262; GFX6-SLOWFMA-NEXT: s_mov_b32 s2, -1 2263; GFX6-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0) 2264; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v0, s[0:1], s6, s6, 1.0 2265; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, vcc, 1.0, s6, 1.0 2266; GFX6-SLOWFMA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 2267; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v2, v0 2268; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, -v0, v2, 1.0 2269; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, v3, v2, v2 2270; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v3, v1, v2 2271; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v0, v3, v1 2272; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v2, v3 2273; GFX6-SLOWFMA-NEXT: v_fma_f32 v0, -v0, v3, v1 2274; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v0, v0, v2, v3 2275; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v0, s6, 1.0 2276; GFX6-SLOWFMA-NEXT: s_waitcnt lgkmcnt(0) 2277; GFX6-SLOWFMA-NEXT: buffer_store_dword v0, off, s[0:3], 0 2278; GFX6-SLOWFMA-NEXT: s_endpgm 2279; 2280; GFX7-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt: 2281; GFX7: ; %bb.0: ; %entry 2282; GFX7-NEXT: s_load_dword s6, s[4:5], 0xb 2283; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 2284; GFX7-NEXT: s_mov_b32 s3, 0xf000 2285; GFX7-NEXT: s_mov_b32 s2, -1 2286; GFX7-NEXT: s_waitcnt lgkmcnt(0) 2287; GFX7-NEXT: v_div_scale_f32 v0, s[4:5], s6, s6, 1.0 2288; GFX7-NEXT: v_rcp_f32_e32 v1, v0 2289; GFX7-NEXT: v_div_scale_f32 v2, vcc, 1.0, s6, 1.0 2290; GFX7-NEXT: v_fma_f32 v3, -v0, v1, 1.0 2291; GFX7-NEXT: v_fma_f32 v1, v3, v1, v1 2292; GFX7-NEXT: v_mul_f32_e32 v3, v2, v1 2293; GFX7-NEXT: v_fma_f32 v4, -v0, v3, v2 2294; GFX7-NEXT: v_fma_f32 v3, v4, v1, v3 2295; GFX7-NEXT: v_fma_f32 v0, -v0, v3, v2 2296; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v3 2297; GFX7-NEXT: v_div_fixup_f32 v0, v0, s6, 1.0 2298; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 2299; GFX7-NEXT: s_endpgm 2300; 2301; GFX8-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt: 2302; GFX8: ; %bb.0: ; %entry 2303; GFX8-NEXT: s_load_dword s2, s[4:5], 0x2c 2304; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2305; GFX8-NEXT: v_div_scale_f32 v0, s[0:1], s2, s2, 1.0 2306; GFX8-NEXT: v_div_scale_f32 v1, vcc, 1.0, s2, 1.0 2307; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2308; GFX8-NEXT: v_rcp_f32_e32 v2, v0 2309; GFX8-NEXT: v_fma_f32 v3, -v0, v2, 1.0 2310; GFX8-NEXT: v_fma_f32 v2, v3, v2, v2 2311; GFX8-NEXT: v_mul_f32_e32 v3, v1, v2 2312; GFX8-NEXT: v_fma_f32 v4, -v0, v3, v1 2313; GFX8-NEXT: v_fma_f32 v3, v4, v2, v3 2314; GFX8-NEXT: v_fma_f32 v0, -v0, v3, v1 2315; GFX8-NEXT: v_div_fmas_f32 v0, v0, v2, v3 2316; GFX8-NEXT: v_div_fixup_f32 v2, v0, s2, 1.0 2317; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2318; GFX8-NEXT: v_mov_b32_e32 v0, s0 2319; GFX8-NEXT: v_mov_b32_e32 v1, s1 2320; GFX8-NEXT: flat_store_dword v[0:1], v2 2321; GFX8-NEXT: s_endpgm 2322; 2323; GFX10-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt: 2324; GFX10: ; %bb.0: ; %entry 2325; GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2326; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2327; GFX10-NEXT: v_div_scale_f32 v0, s0, s2, s2, 1.0 2328; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2329; GFX10-NEXT: v_rcp_f32_e32 v1, v0 2330; GFX10-NEXT: v_fma_f32 v2, -v0, v1, 1.0 2331; GFX10-NEXT: v_fmac_f32_e32 v1, v2, v1 2332; GFX10-NEXT: v_div_scale_f32 v2, vcc_lo, 1.0, s2, 1.0 2333; GFX10-NEXT: v_mul_f32_e32 v3, v2, v1 2334; GFX10-NEXT: v_fma_f32 v4, -v0, v3, v2 2335; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v1 2336; GFX10-NEXT: v_fma_f32 v0, -v0, v3, v2 2337; GFX10-NEXT: v_div_fmas_f32 v0, v0, v1, v3 2338; GFX10-NEXT: v_mov_b32_e32 v1, 0 2339; GFX10-NEXT: v_div_fixup_f32 v0, v0, s2, 1.0 2340; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2341; GFX10-NEXT: global_store_dword v1, v0, s[0:1] 2342; GFX10-NEXT: s_endpgm 2343; 2344; GFX11-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt: 2345; GFX11: ; %bb.0: ; %entry 2346; GFX11-NEXT: s_clause 0x1 2347; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2348; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2349; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2350; GFX11-NEXT: v_div_scale_f32 v0, null, s2, s2, 1.0 2351; GFX11-NEXT: v_rcp_f32_e32 v1, v0 2352; GFX11-NEXT: s_waitcnt_depctr 0xfff 2353; GFX11-NEXT: v_fma_f32 v2, -v0, v1, 1.0 2354; GFX11-NEXT: v_fmac_f32_e32 v1, v2, v1 2355; GFX11-NEXT: v_div_scale_f32 v2, vcc_lo, 1.0, s2, 1.0 2356; GFX11-NEXT: v_mul_f32_e32 v3, v2, v1 2357; GFX11-NEXT: v_fma_f32 v4, -v0, v3, v2 2358; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v1 2359; GFX11-NEXT: v_fma_f32 v0, -v0, v3, v2 2360; GFX11-NEXT: v_div_fmas_f32 v0, v0, v1, v3 2361; GFX11-NEXT: v_mov_b32_e32 v1, 0 2362; GFX11-NEXT: v_div_fixup_f32 v0, v0, s2, 1.0 2363; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 2364; GFX11-NEXT: s_endpgm 2365; 2366; EG-LABEL: s_fdiv_f32_denorms_correctly_rounded_divide_sqrt: 2367; EG: ; %bb.0: ; %entry 2368; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 2369; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 2370; EG-NEXT: CF_END 2371; EG-NEXT: PAD 2372; EG-NEXT: ALU clause starting at 4: 2373; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 2374; EG-NEXT: RECIP_IEEE * T1.X, KC0[2].Z, 2375; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2376entry: 2377 %fdiv = fdiv float 1.000000e+00, %a 2378 store float %fdiv, ptr addrspace(1) %out 2379 ret void 2380} 2381 2382define float @v_fdiv_f32_dynamic_denorm(float %a, float %b) #2 { 2383; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_denorm: 2384; GFX6-FASTFMA: ; %bb.0: 2385; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2386; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2387; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 2388; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 2389; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 2390; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2391; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2392; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 2393; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 2394; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 2395; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 2396; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 2397; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 2398; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2399; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2400; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 2401; 2402; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic_denorm: 2403; GFX6-SLOWFMA: ; %bb.0: 2404; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2405; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2406; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 2407; GFX6-SLOWFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 2408; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 2409; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2410; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 2411; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 2412; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 2413; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 2414; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 2415; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 2416; GFX6-SLOWFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 2417; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 2418; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2419; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 2420; 2421; GFX7-LABEL: v_fdiv_f32_dynamic_denorm: 2422; GFX7: ; %bb.0: 2423; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2424; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2425; GFX7-NEXT: v_rcp_f32_e32 v3, v2 2426; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 2427; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 2428; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2429; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2430; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 2431; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 2432; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 2433; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 2434; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 2435; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 2436; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2437; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2438; GFX7-NEXT: s_setpc_b64 s[30:31] 2439; 2440; GFX8-LABEL: v_fdiv_f32_dynamic_denorm: 2441; GFX8: ; %bb.0: 2442; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2443; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2444; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 2445; GFX8-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 2446; GFX8-NEXT: v_rcp_f32_e32 v4, v2 2447; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2448; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 2449; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 2450; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 2451; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 2452; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 2453; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 2454; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 2455; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 2456; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2457; GFX8-NEXT: s_setpc_b64 s[30:31] 2458; 2459; GFX10-LABEL: v_fdiv_f32_dynamic_denorm: 2460; GFX10: ; %bb.0: 2461; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2462; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 2463; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 2464; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 2465; GFX10-NEXT: v_rcp_f32_e32 v3, v2 2466; GFX10-NEXT: s_denorm_mode 15 2467; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2468; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 2469; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 2470; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 2471; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 2472; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 2473; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 2474; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2475; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2476; GFX10-NEXT: s_setpc_b64 s[30:31] 2477; 2478; GFX11-LABEL: v_fdiv_f32_dynamic_denorm: 2479; GFX11: ; %bb.0: 2480; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2481; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 2482; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 2483; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2) 2484; GFX11-NEXT: v_rcp_f32_e32 v3, v2 2485; GFX11-NEXT: s_denorm_mode 15 2486; GFX11-NEXT: s_waitcnt_depctr 0xfff 2487; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2488; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 2489; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 2490; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 2491; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 2492; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 2493; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 2494; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2495; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2496; GFX11-NEXT: s_setpc_b64 s[30:31] 2497; 2498; EG-LABEL: v_fdiv_f32_dynamic_denorm: 2499; EG: ; %bb.0: 2500; EG-NEXT: CF_END 2501; EG-NEXT: PAD 2502 %fdiv = fdiv float %a, %b 2503 ret float %fdiv 2504} 2505 2506define float @v_fdiv_f32_ieee(float %x, float %y) #1 { 2507; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee: 2508; GFX6-FASTFMA: ; %bb.0: 2509; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2510; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2511; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 2512; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 2513; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 2514; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 2515; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 2516; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 2517; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 2518; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 2519; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2520; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2521; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 2522; 2523; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee: 2524; GFX6-SLOWFMA: ; %bb.0: 2525; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2526; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2527; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 2528; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 2529; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 2530; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 2531; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 2532; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 2533; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 2534; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 2535; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 2536; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2537; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 2538; 2539; GFX7-LABEL: v_fdiv_f32_ieee: 2540; GFX7: ; %bb.0: 2541; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2542; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2543; GFX7-NEXT: v_rcp_f32_e32 v3, v2 2544; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 2545; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 2546; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 2547; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 2548; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 2549; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 2550; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 2551; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2552; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2553; GFX7-NEXT: s_setpc_b64 s[30:31] 2554; 2555; GFX8-LABEL: v_fdiv_f32_ieee: 2556; GFX8: ; %bb.0: 2557; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2558; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2559; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 2560; GFX8-NEXT: v_rcp_f32_e32 v4, v2 2561; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 2562; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 2563; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 2564; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 2565; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 2566; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 2567; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 2568; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2569; GFX8-NEXT: s_setpc_b64 s[30:31] 2570; 2571; GFX10-LABEL: v_fdiv_f32_ieee: 2572; GFX10: ; %bb.0: 2573; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2574; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 2575; GFX10-NEXT: v_rcp_f32_e32 v3, v2 2576; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 2577; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 2578; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 2579; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 2580; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 2581; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 2582; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 2583; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2584; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2585; GFX10-NEXT: s_setpc_b64 s[30:31] 2586; 2587; GFX11-LABEL: v_fdiv_f32_ieee: 2588; GFX11: ; %bb.0: 2589; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2590; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 2591; GFX11-NEXT: v_rcp_f32_e32 v3, v2 2592; GFX11-NEXT: s_waitcnt_depctr 0xfff 2593; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 2594; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 2595; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 2596; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 2597; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 2598; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 2599; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 2600; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2601; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2602; GFX11-NEXT: s_setpc_b64 s[30:31] 2603; 2604; EG-LABEL: v_fdiv_f32_ieee: 2605; EG: ; %bb.0: 2606; EG-NEXT: CF_END 2607; EG-NEXT: PAD 2608 %div = fdiv float %x, %y 2609 ret float %div 2610} 2611 2612define float @v_fdiv_f32_ieee_25ulp(float %x, float %y) #1 { 2613; GFX6-LABEL: v_fdiv_f32_ieee_25ulp: 2614; GFX6: ; %bb.0: 2615; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2616; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 2617; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 2618; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 2619; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 2620; GFX6-NEXT: v_rcp_f32_e32 v2, v2 2621; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 2622; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2623; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2624; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc 2625; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2626; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 2627; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 2628; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 2629; GFX6-NEXT: s_setpc_b64 s[30:31] 2630; 2631; GFX7-LABEL: v_fdiv_f32_ieee_25ulp: 2632; GFX7: ; %bb.0: 2633; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2634; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 2635; GFX7-NEXT: v_rcp_f32_e32 v2, v2 2636; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2637; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 2638; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 2639; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 2640; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 2641; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 2642; GFX7-NEXT: s_setpc_b64 s[30:31] 2643; 2644; GFX8-LABEL: v_fdiv_f32_ieee_25ulp: 2645; GFX8: ; %bb.0: 2646; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2647; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 2648; GFX8-NEXT: v_rcp_f32_e32 v2, v2 2649; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2650; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 2651; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 2652; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 2653; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 2654; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 2655; GFX8-NEXT: s_setpc_b64 s[30:31] 2656; 2657; GFX10-LABEL: v_fdiv_f32_ieee_25ulp: 2658; GFX10: ; %bb.0: 2659; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2660; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 2661; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2662; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 2663; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2664; GFX10-NEXT: v_rcp_f32_e32 v2, v2 2665; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 2666; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 2667; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 2668; GFX10-NEXT: s_setpc_b64 s[30:31] 2669; 2670; GFX11-LABEL: v_fdiv_f32_ieee_25ulp: 2671; GFX11: ; %bb.0: 2672; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2673; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 2674; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2675; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 2676; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2677; GFX11-NEXT: v_rcp_f32_e32 v2, v2 2678; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 2679; GFX11-NEXT: s_waitcnt_depctr 0xfff 2680; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 2681; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 2682; GFX11-NEXT: s_setpc_b64 s[30:31] 2683; 2684; EG-LABEL: v_fdiv_f32_ieee_25ulp: 2685; EG: ; %bb.0: 2686; EG-NEXT: CF_END 2687; EG-NEXT: PAD 2688 %div = fdiv float %x, %y, !fpmath !0 2689 ret float %div 2690} 2691 2692define float @v_fdiv_f32_dynamic(float %x, float %y) #2 { 2693; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic: 2694; GFX6-FASTFMA: ; %bb.0: 2695; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2696; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2697; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 2698; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 2699; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 2700; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2701; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2702; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 2703; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 2704; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 2705; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 2706; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 2707; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 2708; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2709; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2710; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 2711; 2712; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic: 2713; GFX6-SLOWFMA: ; %bb.0: 2714; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2715; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2716; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 2717; GFX6-SLOWFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 2718; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 2719; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2720; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 2721; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 2722; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 2723; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 2724; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 2725; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 2726; GFX6-SLOWFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 2727; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 2728; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2729; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 2730; 2731; GFX7-LABEL: v_fdiv_f32_dynamic: 2732; GFX7: ; %bb.0: 2733; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2734; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2735; GFX7-NEXT: v_rcp_f32_e32 v3, v2 2736; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 2737; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 2738; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2739; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2740; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 2741; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 2742; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 2743; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 2744; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 2745; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 2746; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2747; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2748; GFX7-NEXT: s_setpc_b64 s[30:31] 2749; 2750; GFX8-LABEL: v_fdiv_f32_dynamic: 2751; GFX8: ; %bb.0: 2752; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2753; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2754; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 2755; GFX8-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 2756; GFX8-NEXT: v_rcp_f32_e32 v4, v2 2757; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2758; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 2759; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 2760; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 2761; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 2762; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 2763; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 2764; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 2765; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 2766; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2767; GFX8-NEXT: s_setpc_b64 s[30:31] 2768; 2769; GFX10-LABEL: v_fdiv_f32_dynamic: 2770; GFX10: ; %bb.0: 2771; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2772; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 2773; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 2774; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 2775; GFX10-NEXT: v_rcp_f32_e32 v3, v2 2776; GFX10-NEXT: s_denorm_mode 15 2777; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2778; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 2779; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 2780; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 2781; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 2782; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 2783; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 2784; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2785; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2786; GFX10-NEXT: s_setpc_b64 s[30:31] 2787; 2788; GFX11-LABEL: v_fdiv_f32_dynamic: 2789; GFX11: ; %bb.0: 2790; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2791; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 2792; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 2793; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2) 2794; GFX11-NEXT: v_rcp_f32_e32 v3, v2 2795; GFX11-NEXT: s_denorm_mode 15 2796; GFX11-NEXT: s_waitcnt_depctr 0xfff 2797; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2798; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 2799; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 2800; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 2801; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 2802; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 2803; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 2804; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2805; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2806; GFX11-NEXT: s_setpc_b64 s[30:31] 2807; 2808; EG-LABEL: v_fdiv_f32_dynamic: 2809; EG: ; %bb.0: 2810; EG-NEXT: CF_END 2811; EG-NEXT: PAD 2812 %div = fdiv float %x, %y 2813 ret float %div 2814} 2815 2816define float @v_fdiv_f32_dynamic_25ulp(float %x, float %y) #2 { 2817; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp: 2818; GFX6: ; %bb.0: 2819; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2820; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 2821; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 2822; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 2823; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 2824; GFX6-NEXT: v_rcp_f32_e32 v2, v2 2825; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 2826; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 2827; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2828; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc 2829; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2830; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 2831; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 2832; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 2833; GFX6-NEXT: s_setpc_b64 s[30:31] 2834; 2835; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp: 2836; GFX7: ; %bb.0: 2837; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2838; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 2839; GFX7-NEXT: v_rcp_f32_e32 v2, v2 2840; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2841; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 2842; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 2843; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 2844; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 2845; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 2846; GFX7-NEXT: s_setpc_b64 s[30:31] 2847; 2848; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp: 2849; GFX8: ; %bb.0: 2850; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2851; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 2852; GFX8-NEXT: v_rcp_f32_e32 v2, v2 2853; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2854; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 2855; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 2856; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 2857; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 2858; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 2859; GFX8-NEXT: s_setpc_b64 s[30:31] 2860; 2861; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp: 2862; GFX10: ; %bb.0: 2863; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2864; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 2865; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2866; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 2867; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2868; GFX10-NEXT: v_rcp_f32_e32 v2, v2 2869; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 2870; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 2871; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 2872; GFX10-NEXT: s_setpc_b64 s[30:31] 2873; 2874; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp: 2875; GFX11: ; %bb.0: 2876; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2877; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 2878; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 2879; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 2880; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 2881; GFX11-NEXT: v_rcp_f32_e32 v2, v2 2882; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 2883; GFX11-NEXT: s_waitcnt_depctr 0xfff 2884; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 2885; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 2886; GFX11-NEXT: s_setpc_b64 s[30:31] 2887; 2888; EG-LABEL: v_fdiv_f32_dynamic_25ulp: 2889; EG: ; %bb.0: 2890; EG-NEXT: CF_END 2891; EG-NEXT: PAD 2892 %div = fdiv float %x, %y, !fpmath !0 2893 ret float %div 2894} 2895 2896define float @v_fdiv_f32_daz(float %x, float %y) #0 { 2897; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz: 2898; GFX6-FASTFMA: ; %bb.0: 2899; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2900; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2901; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 2902; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 2903; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2904; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2905; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 2906; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 2907; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 2908; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 2909; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 2910; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 2911; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2912; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2913; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 2914; 2915; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz: 2916; GFX6-SLOWFMA: ; %bb.0: 2917; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2918; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2919; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 2920; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 2921; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2922; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 2923; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 2924; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 2925; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 2926; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 2927; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 2928; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 2929; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 2930; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2931; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 2932; 2933; GFX7-LABEL: v_fdiv_f32_daz: 2934; GFX7: ; %bb.0: 2935; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2936; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2937; GFX7-NEXT: v_rcp_f32_e32 v3, v2 2938; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 2939; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2940; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2941; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 2942; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 2943; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 2944; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 2945; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 2946; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 2947; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2948; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2949; GFX7-NEXT: s_setpc_b64 s[30:31] 2950; 2951; GFX8-LABEL: v_fdiv_f32_daz: 2952; GFX8: ; %bb.0: 2953; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2954; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 2955; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 2956; GFX8-NEXT: v_rcp_f32_e32 v4, v2 2957; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 2958; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 2959; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 2960; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 2961; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 2962; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 2963; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 2964; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 2965; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 2966; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2967; GFX8-NEXT: s_setpc_b64 s[30:31] 2968; 2969; GFX10-LABEL: v_fdiv_f32_daz: 2970; GFX10: ; %bb.0: 2971; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2972; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 2973; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 2974; GFX10-NEXT: v_rcp_f32_e32 v3, v2 2975; GFX10-NEXT: s_denorm_mode 15 2976; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2977; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 2978; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 2979; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 2980; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 2981; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 2982; GFX10-NEXT: s_denorm_mode 12 2983; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 2984; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 2985; GFX10-NEXT: s_setpc_b64 s[30:31] 2986; 2987; GFX11-LABEL: v_fdiv_f32_daz: 2988; GFX11: ; %bb.0: 2989; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2990; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 2991; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 2992; GFX11-NEXT: v_rcp_f32_e32 v3, v2 2993; GFX11-NEXT: s_denorm_mode 15 2994; GFX11-NEXT: s_waitcnt_depctr 0xfff 2995; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 2996; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 2997; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 2998; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 2999; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 3000; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 3001; GFX11-NEXT: s_denorm_mode 12 3002; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 3003; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3004; GFX11-NEXT: s_setpc_b64 s[30:31] 3005; 3006; EG-LABEL: v_fdiv_f32_daz: 3007; EG: ; %bb.0: 3008; EG-NEXT: CF_END 3009; EG-NEXT: PAD 3010 %div = fdiv float %x, %y 3011 ret float %div 3012} 3013 3014define float @v_fdiv_f32_daz_25ulp(float %x, float %y) #0 { 3015; GFX678-LABEL: v_fdiv_f32_daz_25ulp: 3016; GFX678: ; %bb.0: 3017; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3018; GFX678-NEXT: s_mov_b32 s4, 0x6f800000 3019; GFX678-NEXT: v_mov_b32_e32 v2, 0x2f800000 3020; GFX678-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, s4 3021; GFX678-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc 3022; GFX678-NEXT: v_mul_f32_e32 v1, v1, v2 3023; GFX678-NEXT: v_rcp_f32_e32 v1, v1 3024; GFX678-NEXT: v_mul_f32_e32 v0, v0, v1 3025; GFX678-NEXT: v_mul_f32_e32 v0, v2, v0 3026; GFX678-NEXT: s_setpc_b64 s[30:31] 3027; 3028; GFX10-LABEL: v_fdiv_f32_daz_25ulp: 3029; GFX10: ; %bb.0: 3030; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3031; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1| 3032; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4 3033; GFX10-NEXT: v_mul_f32_e32 v1, v1, v2 3034; GFX10-NEXT: v_rcp_f32_e32 v1, v1 3035; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 3036; GFX10-NEXT: v_mul_f32_e32 v0, v2, v0 3037; GFX10-NEXT: s_setpc_b64 s[30:31] 3038; 3039; GFX11-LABEL: v_fdiv_f32_daz_25ulp: 3040; GFX11: ; %bb.0: 3041; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3042; GFX11-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1| 3043; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0 3044; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 3045; GFX11-NEXT: v_rcp_f32_e32 v1, v1 3046; GFX11-NEXT: s_waitcnt_depctr 0xfff 3047; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 3048; GFX11-NEXT: v_mul_f32_e32 v0, v2, v0 3049; GFX11-NEXT: s_setpc_b64 s[30:31] 3050; 3051; EG-LABEL: v_fdiv_f32_daz_25ulp: 3052; EG: ; %bb.0: 3053; EG-NEXT: CF_END 3054; EG-NEXT: PAD 3055 %div = fdiv float %x, %y, !fpmath !0 3056 ret float %div 3057} 3058 3059; If we emit an fmul, make sure it fuses into the user. 3060define float @v_fdiv_f32_ieee_contractable_user(float %x, float %y, float %z) #1 { 3061; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee_contractable_user: 3062; GFX6-FASTFMA: ; %bb.0: 3063; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3064; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3065; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 3066; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 3067; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v4, v4 3068; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 3069; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4 3070; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5 3071; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6 3072; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5 3073; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3074; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3075; GFX6-FASTFMA-NEXT: v_add_f32_e32 v0, v0, v2 3076; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 3077; 3078; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee_contractable_user: 3079; GFX6-SLOWFMA: ; %bb.0: 3080; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3081; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3082; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 3083; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v5, v3 3084; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, 1.0 3085; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v5, v5 3086; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v6, v4, v5 3087; GFX6-SLOWFMA-NEXT: v_fma_f32 v7, -v3, v6, v4 3088; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, v7, v5, v6 3089; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, -v3, v6, v4 3090; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v3, v3, v5, v6 3091; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3092; GFX6-SLOWFMA-NEXT: v_add_f32_e32 v0, v0, v2 3093; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 3094; 3095; GFX7-LABEL: v_fdiv_f32_ieee_contractable_user: 3096; GFX7: ; %bb.0: 3097; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3098; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3099; GFX7-NEXT: v_rcp_f32_e32 v4, v3 3100; GFX7-NEXT: v_fma_f32 v5, -v3, v4, 1.0 3101; GFX7-NEXT: v_fma_f32 v4, v5, v4, v4 3102; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 3103; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4 3104; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5 3105; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6 3106; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5 3107; GFX7-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3108; GFX7-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3109; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 3110; GFX7-NEXT: s_setpc_b64 s[30:31] 3111; 3112; GFX8-LABEL: v_fdiv_f32_ieee_contractable_user: 3113; GFX8: ; %bb.0: 3114; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3115; GFX8-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3116; GFX8-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 3117; GFX8-NEXT: v_rcp_f32_e32 v5, v3 3118; GFX8-NEXT: v_fma_f32 v6, -v3, v5, 1.0 3119; GFX8-NEXT: v_fma_f32 v5, v6, v5, v5 3120; GFX8-NEXT: v_mul_f32_e32 v6, v4, v5 3121; GFX8-NEXT: v_fma_f32 v7, -v3, v6, v4 3122; GFX8-NEXT: v_fma_f32 v6, v7, v5, v6 3123; GFX8-NEXT: v_fma_f32 v3, -v3, v6, v4 3124; GFX8-NEXT: v_div_fmas_f32 v3, v3, v5, v6 3125; GFX8-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3126; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 3127; GFX8-NEXT: s_setpc_b64 s[30:31] 3128; 3129; GFX10-LABEL: v_fdiv_f32_ieee_contractable_user: 3130; GFX10: ; %bb.0: 3131; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3132; GFX10-NEXT: v_div_scale_f32 v3, s4, v1, v1, v0 3133; GFX10-NEXT: v_rcp_f32_e32 v4, v3 3134; GFX10-NEXT: v_fma_f32 v5, -v3, v4, 1.0 3135; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v4 3136; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 3137; GFX10-NEXT: v_mul_f32_e32 v6, v5, v4 3138; GFX10-NEXT: v_fma_f32 v7, -v3, v6, v5 3139; GFX10-NEXT: v_fmac_f32_e32 v6, v7, v4 3140; GFX10-NEXT: v_fma_f32 v3, -v3, v6, v5 3141; GFX10-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3142; GFX10-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3143; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 3144; GFX10-NEXT: s_setpc_b64 s[30:31] 3145; 3146; GFX11-LABEL: v_fdiv_f32_ieee_contractable_user: 3147; GFX11: ; %bb.0: 3148; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3149; GFX11-NEXT: v_div_scale_f32 v3, null, v1, v1, v0 3150; GFX11-NEXT: v_rcp_f32_e32 v4, v3 3151; GFX11-NEXT: s_waitcnt_depctr 0xfff 3152; GFX11-NEXT: v_fma_f32 v5, -v3, v4, 1.0 3153; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v4 3154; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 3155; GFX11-NEXT: v_mul_f32_e32 v6, v5, v4 3156; GFX11-NEXT: v_fma_f32 v7, -v3, v6, v5 3157; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v4 3158; GFX11-NEXT: v_fma_f32 v3, -v3, v6, v5 3159; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3160; GFX11-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3161; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 3162; GFX11-NEXT: s_setpc_b64 s[30:31] 3163; 3164; EG-LABEL: v_fdiv_f32_ieee_contractable_user: 3165; EG: ; %bb.0: 3166; EG-NEXT: CF_END 3167; EG-NEXT: PAD 3168 %div = fdiv contract float %x, %y 3169 %add = fadd contract float %div, %z 3170 ret float %add 3171} 3172 3173define float @v_fdiv_f32_ieee_25ulp_contractable_user(float %x, float %y, float %z) #1 { 3174; GFX6-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user: 3175; GFX6: ; %bb.0: 3176; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3177; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 3178; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v1 3179; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 3180; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 3181; GFX6-NEXT: v_rcp_f32_e32 v3, v3 3182; GFX6-NEXT: v_frexp_mant_f32_e32 v4, v0 3183; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 3184; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3185; GFX6-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 3186; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 3187; GFX6-NEXT: v_mul_f32_e32 v3, v4, v3 3188; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 3189; GFX6-NEXT: v_ldexp_f32_e32 v0, v3, v0 3190; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 3191; GFX6-NEXT: s_setpc_b64 s[30:31] 3192; 3193; GFX7-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user: 3194; GFX7: ; %bb.0: 3195; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3196; GFX7-NEXT: v_frexp_mant_f32_e32 v3, v1 3197; GFX7-NEXT: v_rcp_f32_e32 v3, v3 3198; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3199; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 3200; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 3201; GFX7-NEXT: v_mul_f32_e32 v0, v0, v3 3202; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 3203; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 3204; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 3205; GFX7-NEXT: s_setpc_b64 s[30:31] 3206; 3207; GFX8-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user: 3208; GFX8: ; %bb.0: 3209; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3210; GFX8-NEXT: v_frexp_mant_f32_e32 v3, v1 3211; GFX8-NEXT: v_rcp_f32_e32 v3, v3 3212; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3213; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 3214; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 3215; GFX8-NEXT: v_mul_f32_e32 v0, v0, v3 3216; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v4, v1 3217; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 3218; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 3219; GFX8-NEXT: s_setpc_b64 s[30:31] 3220; 3221; GFX10-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user: 3222; GFX10: ; %bb.0: 3223; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3224; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v1 3225; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3226; GFX10-NEXT: v_frexp_mant_f32_e32 v4, v0 3227; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 3228; GFX10-NEXT: v_rcp_f32_e32 v3, v3 3229; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 3230; GFX10-NEXT: v_mul_f32_e32 v3, v4, v3 3231; GFX10-NEXT: v_ldexp_f32 v0, v3, v0 3232; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 3233; GFX10-NEXT: s_setpc_b64 s[30:31] 3234; 3235; GFX11-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user: 3236; GFX11: ; %bb.0: 3237; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3238; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v1 3239; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3240; GFX11-NEXT: v_frexp_mant_f32_e32 v4, v0 3241; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 3242; GFX11-NEXT: v_rcp_f32_e32 v3, v3 3243; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 3244; GFX11-NEXT: s_waitcnt_depctr 0xfff 3245; GFX11-NEXT: v_mul_f32_e32 v3, v4, v3 3246; GFX11-NEXT: v_ldexp_f32 v0, v3, v0 3247; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 3248; GFX11-NEXT: s_setpc_b64 s[30:31] 3249; 3250; EG-LABEL: v_fdiv_f32_ieee_25ulp_contractable_user: 3251; EG: ; %bb.0: 3252; EG-NEXT: CF_END 3253; EG-NEXT: PAD 3254 %div = fdiv contract float %x, %y, !fpmath !0 3255 %add = fadd contract float %div, %z 3256 ret float %add 3257} 3258 3259define float @v_fdiv_f32_dynamic_contractable_user(float %x, float %y, float %z) #2 { 3260; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_contractable_user: 3261; GFX6-FASTFMA: ; %bb.0: 3262; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3263; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3264; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 3265; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 3266; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 3267; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3268; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0 3269; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4 3270; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4 3271; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5 3272; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6 3273; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5 3274; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 3275; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3276; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3277; GFX6-FASTFMA-NEXT: v_add_f32_e32 v0, v0, v2 3278; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 3279; 3280; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic_contractable_user: 3281; GFX6-SLOWFMA: ; %bb.0: 3282; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3283; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3284; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 3285; GFX6-SLOWFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 3286; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v5, v3 3287; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3288; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, 1.0 3289; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v5, v5 3290; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v6, v4, v5 3291; GFX6-SLOWFMA-NEXT: v_fma_f32 v7, -v3, v6, v4 3292; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, v7, v5, v6 3293; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, -v3, v6, v4 3294; GFX6-SLOWFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 3295; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v3, v3, v5, v6 3296; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3297; GFX6-SLOWFMA-NEXT: v_add_f32_e32 v0, v0, v2 3298; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 3299; 3300; GFX7-LABEL: v_fdiv_f32_dynamic_contractable_user: 3301; GFX7: ; %bb.0: 3302; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3303; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3304; GFX7-NEXT: v_rcp_f32_e32 v4, v3 3305; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 3306; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 3307; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3308; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0 3309; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4 3310; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4 3311; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5 3312; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6 3313; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5 3314; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 3315; GFX7-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3316; GFX7-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3317; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 3318; GFX7-NEXT: s_setpc_b64 s[30:31] 3319; 3320; GFX8-LABEL: v_fdiv_f32_dynamic_contractable_user: 3321; GFX8: ; %bb.0: 3322; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3323; GFX8-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3324; GFX8-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 3325; GFX8-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 3326; GFX8-NEXT: v_rcp_f32_e32 v5, v3 3327; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3328; GFX8-NEXT: v_fma_f32 v6, -v3, v5, 1.0 3329; GFX8-NEXT: v_fma_f32 v5, v6, v5, v5 3330; GFX8-NEXT: v_mul_f32_e32 v6, v4, v5 3331; GFX8-NEXT: v_fma_f32 v7, -v3, v6, v4 3332; GFX8-NEXT: v_fma_f32 v6, v7, v5, v6 3333; GFX8-NEXT: v_fma_f32 v3, -v3, v6, v4 3334; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 3335; GFX8-NEXT: v_div_fmas_f32 v3, v3, v5, v6 3336; GFX8-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3337; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 3338; GFX8-NEXT: s_setpc_b64 s[30:31] 3339; 3340; GFX10-LABEL: v_fdiv_f32_dynamic_contractable_user: 3341; GFX10: ; %bb.0: 3342; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3343; GFX10-NEXT: v_div_scale_f32 v3, s4, v1, v1, v0 3344; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 3345; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 3346; GFX10-NEXT: v_rcp_f32_e32 v4, v3 3347; GFX10-NEXT: s_denorm_mode 15 3348; GFX10-NEXT: v_fma_f32 v6, -v3, v4, 1.0 3349; GFX10-NEXT: v_fmac_f32_e32 v4, v6, v4 3350; GFX10-NEXT: v_mul_f32_e32 v6, v5, v4 3351; GFX10-NEXT: v_fma_f32 v7, -v3, v6, v5 3352; GFX10-NEXT: v_fmac_f32_e32 v6, v7, v4 3353; GFX10-NEXT: v_fma_f32 v3, -v3, v6, v5 3354; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 3355; GFX10-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3356; GFX10-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3357; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 3358; GFX10-NEXT: s_setpc_b64 s[30:31] 3359; 3360; GFX11-LABEL: v_fdiv_f32_dynamic_contractable_user: 3361; GFX11: ; %bb.0: 3362; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3363; GFX11-NEXT: v_div_scale_f32 v3, null, v1, v1, v0 3364; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 3365; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2) 3366; GFX11-NEXT: v_rcp_f32_e32 v4, v3 3367; GFX11-NEXT: s_denorm_mode 15 3368; GFX11-NEXT: s_waitcnt_depctr 0xfff 3369; GFX11-NEXT: v_fma_f32 v6, -v3, v4, 1.0 3370; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v4 3371; GFX11-NEXT: v_mul_f32_e32 v6, v5, v4 3372; GFX11-NEXT: v_fma_f32 v7, -v3, v6, v5 3373; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v4 3374; GFX11-NEXT: v_fma_f32 v3, -v3, v6, v5 3375; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 3376; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3377; GFX11-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3378; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 3379; GFX11-NEXT: s_setpc_b64 s[30:31] 3380; 3381; EG-LABEL: v_fdiv_f32_dynamic_contractable_user: 3382; EG: ; %bb.0: 3383; EG-NEXT: CF_END 3384; EG-NEXT: PAD 3385 %div = fdiv contract float %x, %y 3386 %add = fadd contract float %div, %z 3387 ret float %add 3388} 3389 3390define float @v_fdiv_f32_dynamic_25ulp_contractable_user(float %x, float %y, float %z) #2 { 3391; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user: 3392; GFX6: ; %bb.0: 3393; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3394; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 3395; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v1 3396; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 3397; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 3398; GFX6-NEXT: v_rcp_f32_e32 v3, v3 3399; GFX6-NEXT: v_frexp_mant_f32_e32 v4, v0 3400; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 3401; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3402; GFX6-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 3403; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 3404; GFX6-NEXT: v_mul_f32_e32 v3, v4, v3 3405; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 3406; GFX6-NEXT: v_ldexp_f32_e32 v0, v3, v0 3407; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 3408; GFX6-NEXT: s_setpc_b64 s[30:31] 3409; 3410; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user: 3411; GFX7: ; %bb.0: 3412; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3413; GFX7-NEXT: v_frexp_mant_f32_e32 v3, v1 3414; GFX7-NEXT: v_rcp_f32_e32 v3, v3 3415; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3416; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 3417; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 3418; GFX7-NEXT: v_mul_f32_e32 v0, v0, v3 3419; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 3420; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 3421; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 3422; GFX7-NEXT: s_setpc_b64 s[30:31] 3423; 3424; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user: 3425; GFX8: ; %bb.0: 3426; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3427; GFX8-NEXT: v_frexp_mant_f32_e32 v3, v1 3428; GFX8-NEXT: v_rcp_f32_e32 v3, v3 3429; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3430; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 3431; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 3432; GFX8-NEXT: v_mul_f32_e32 v0, v0, v3 3433; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v4, v1 3434; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 3435; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 3436; GFX8-NEXT: s_setpc_b64 s[30:31] 3437; 3438; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user: 3439; GFX10: ; %bb.0: 3440; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3441; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v1 3442; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3443; GFX10-NEXT: v_frexp_mant_f32_e32 v4, v0 3444; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 3445; GFX10-NEXT: v_rcp_f32_e32 v3, v3 3446; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 3447; GFX10-NEXT: v_mul_f32_e32 v3, v4, v3 3448; GFX10-NEXT: v_ldexp_f32 v0, v3, v0 3449; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 3450; GFX10-NEXT: s_setpc_b64 s[30:31] 3451; 3452; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user: 3453; GFX11: ; %bb.0: 3454; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3455; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v1 3456; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3457; GFX11-NEXT: v_frexp_mant_f32_e32 v4, v0 3458; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 3459; GFX11-NEXT: v_rcp_f32_e32 v3, v3 3460; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 3461; GFX11-NEXT: s_waitcnt_depctr 0xfff 3462; GFX11-NEXT: v_mul_f32_e32 v3, v4, v3 3463; GFX11-NEXT: v_ldexp_f32 v0, v3, v0 3464; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 3465; GFX11-NEXT: s_setpc_b64 s[30:31] 3466; 3467; EG-LABEL: v_fdiv_f32_dynamic_25ulp_contractable_user: 3468; EG: ; %bb.0: 3469; EG-NEXT: CF_END 3470; EG-NEXT: PAD 3471 %div = fdiv contract float %x, %y, !fpmath !0 3472 %add = fadd contract float %div, %z 3473 ret float %add 3474} 3475 3476define float @v_fdiv_f32_daz_contractable_user(float %x, float %y, float %z) #0 { 3477; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz_contractable_user: 3478; GFX6-FASTFMA: ; %bb.0: 3479; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3480; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3481; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 3482; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 3483; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3484; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0 3485; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4 3486; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4 3487; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5 3488; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6 3489; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5 3490; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 3491; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3492; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3493; GFX6-FASTFMA-NEXT: v_add_f32_e32 v0, v0, v2 3494; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 3495; 3496; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz_contractable_user: 3497; GFX6-SLOWFMA: ; %bb.0: 3498; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3499; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3500; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 3501; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v5, v3 3502; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3503; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, 1.0 3504; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v5, v5 3505; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v6, v4, v5 3506; GFX6-SLOWFMA-NEXT: v_fma_f32 v7, -v3, v6, v4 3507; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, v7, v5, v6 3508; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, -v3, v6, v4 3509; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 3510; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v3, v3, v5, v6 3511; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3512; GFX6-SLOWFMA-NEXT: v_add_f32_e32 v0, v0, v2 3513; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 3514; 3515; GFX7-LABEL: v_fdiv_f32_daz_contractable_user: 3516; GFX7: ; %bb.0: 3517; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3518; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3519; GFX7-NEXT: v_rcp_f32_e32 v4, v3 3520; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 3521; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3522; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0 3523; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4 3524; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4 3525; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5 3526; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6 3527; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5 3528; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 3529; GFX7-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3530; GFX7-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3531; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 3532; GFX7-NEXT: s_setpc_b64 s[30:31] 3533; 3534; GFX8-LABEL: v_fdiv_f32_daz_contractable_user: 3535; GFX8: ; %bb.0: 3536; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3537; GFX8-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 3538; GFX8-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 3539; GFX8-NEXT: v_rcp_f32_e32 v5, v3 3540; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3541; GFX8-NEXT: v_fma_f32 v6, -v3, v5, 1.0 3542; GFX8-NEXT: v_fma_f32 v5, v6, v5, v5 3543; GFX8-NEXT: v_mul_f32_e32 v6, v4, v5 3544; GFX8-NEXT: v_fma_f32 v7, -v3, v6, v4 3545; GFX8-NEXT: v_fma_f32 v6, v7, v5, v6 3546; GFX8-NEXT: v_fma_f32 v3, -v3, v6, v4 3547; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 3548; GFX8-NEXT: v_div_fmas_f32 v3, v3, v5, v6 3549; GFX8-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3550; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 3551; GFX8-NEXT: s_setpc_b64 s[30:31] 3552; 3553; GFX10-LABEL: v_fdiv_f32_daz_contractable_user: 3554; GFX10: ; %bb.0: 3555; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3556; GFX10-NEXT: v_div_scale_f32 v3, s4, v1, v1, v0 3557; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 3558; GFX10-NEXT: v_rcp_f32_e32 v4, v3 3559; GFX10-NEXT: s_denorm_mode 15 3560; GFX10-NEXT: v_fma_f32 v6, -v3, v4, 1.0 3561; GFX10-NEXT: v_fmac_f32_e32 v4, v6, v4 3562; GFX10-NEXT: v_mul_f32_e32 v6, v5, v4 3563; GFX10-NEXT: v_fma_f32 v7, -v3, v6, v5 3564; GFX10-NEXT: v_fmac_f32_e32 v6, v7, v4 3565; GFX10-NEXT: v_fma_f32 v3, -v3, v6, v5 3566; GFX10-NEXT: s_denorm_mode 12 3567; GFX10-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3568; GFX10-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3569; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 3570; GFX10-NEXT: s_setpc_b64 s[30:31] 3571; 3572; GFX11-LABEL: v_fdiv_f32_daz_contractable_user: 3573; GFX11: ; %bb.0: 3574; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3575; GFX11-NEXT: v_div_scale_f32 v3, null, v1, v1, v0 3576; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 3577; GFX11-NEXT: v_rcp_f32_e32 v4, v3 3578; GFX11-NEXT: s_denorm_mode 15 3579; GFX11-NEXT: s_waitcnt_depctr 0xfff 3580; GFX11-NEXT: v_fma_f32 v6, -v3, v4, 1.0 3581; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v4 3582; GFX11-NEXT: v_mul_f32_e32 v6, v5, v4 3583; GFX11-NEXT: v_fma_f32 v7, -v3, v6, v5 3584; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v4 3585; GFX11-NEXT: v_fma_f32 v3, -v3, v6, v5 3586; GFX11-NEXT: s_denorm_mode 12 3587; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v6 3588; GFX11-NEXT: v_div_fixup_f32 v0, v3, v1, v0 3589; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 3590; GFX11-NEXT: s_setpc_b64 s[30:31] 3591; 3592; EG-LABEL: v_fdiv_f32_daz_contractable_user: 3593; EG: ; %bb.0: 3594; EG-NEXT: CF_END 3595; EG-NEXT: PAD 3596 %div = fdiv contract float %x, %y 3597 %add = fadd contract float %div, %z 3598 ret float %add 3599} 3600 3601define float @v_fdiv_f32_daz_25ulp_contractable_user(float %x, float %y, float %z) #0 { 3602; GFX678-LABEL: v_fdiv_f32_daz_25ulp_contractable_user: 3603; GFX678: ; %bb.0: 3604; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3605; GFX678-NEXT: s_mov_b32 s4, 0x6f800000 3606; GFX678-NEXT: v_mov_b32_e32 v3, 0x2f800000 3607; GFX678-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, s4 3608; GFX678-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc 3609; GFX678-NEXT: v_mul_f32_e32 v1, v1, v3 3610; GFX678-NEXT: v_rcp_f32_e32 v1, v1 3611; GFX678-NEXT: v_mul_f32_e32 v0, v0, v1 3612; GFX678-NEXT: v_mad_f32 v0, v3, v0, v2 3613; GFX678-NEXT: s_setpc_b64 s[30:31] 3614; 3615; GFX10-LABEL: v_fdiv_f32_daz_25ulp_contractable_user: 3616; GFX10: ; %bb.0: 3617; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3618; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1| 3619; GFX10-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x2f800000, s4 3620; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3 3621; GFX10-NEXT: v_rcp_f32_e32 v1, v1 3622; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 3623; GFX10-NEXT: v_mad_f32 v0, v3, v0, v2 3624; GFX10-NEXT: s_setpc_b64 s[30:31] 3625; 3626; GFX11-LABEL: v_fdiv_f32_daz_25ulp_contractable_user: 3627; GFX11: ; %bb.0: 3628; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3629; GFX11-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1| 3630; GFX11-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x2f800000, s0 3631; GFX11-NEXT: v_mul_f32_e32 v1, v1, v3 3632; GFX11-NEXT: v_rcp_f32_e32 v1, v1 3633; GFX11-NEXT: s_waitcnt_depctr 0xfff 3634; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 3635; GFX11-NEXT: v_fma_f32 v0, v3, v0, v2 3636; GFX11-NEXT: s_setpc_b64 s[30:31] 3637; 3638; EG-LABEL: v_fdiv_f32_daz_25ulp_contractable_user: 3639; EG: ; %bb.0: 3640; EG-NEXT: CF_END 3641; EG-NEXT: PAD 3642 %div = fdiv contract float %x, %y, !fpmath !0 3643 %add = fadd contract float %div, %z 3644 ret float %add 3645} 3646 3647define float @v_fdiv_f32_ieee__nnan_ninf(float %x, float %y, float %z) #1 { 3648; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee__nnan_ninf: 3649; GFX6-FASTFMA: ; %bb.0: 3650; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3651; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 3652; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 3653; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 3654; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 3655; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 3656; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 3657; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 3658; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 3659; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 3660; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 3661; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3662; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 3663; 3664; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee__nnan_ninf: 3665; GFX6-SLOWFMA: ; %bb.0: 3666; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3667; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 3668; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 3669; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 3670; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 3671; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 3672; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 3673; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 3674; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 3675; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 3676; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 3677; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3678; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 3679; 3680; GFX7-LABEL: v_fdiv_f32_ieee__nnan_ninf: 3681; GFX7: ; %bb.0: 3682; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3683; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 3684; GFX7-NEXT: v_rcp_f32_e32 v3, v2 3685; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 3686; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 3687; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 3688; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 3689; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 3690; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 3691; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 3692; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 3693; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3694; GFX7-NEXT: s_setpc_b64 s[30:31] 3695; 3696; GFX8-LABEL: v_fdiv_f32_ieee__nnan_ninf: 3697; GFX8: ; %bb.0: 3698; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3699; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 3700; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 3701; GFX8-NEXT: v_rcp_f32_e32 v4, v2 3702; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 3703; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 3704; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 3705; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 3706; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 3707; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 3708; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 3709; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3710; GFX8-NEXT: s_setpc_b64 s[30:31] 3711; 3712; GFX10-LABEL: v_fdiv_f32_ieee__nnan_ninf: 3713; GFX10: ; %bb.0: 3714; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3715; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 3716; GFX10-NEXT: v_rcp_f32_e32 v3, v2 3717; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 3718; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 3719; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 3720; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 3721; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 3722; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 3723; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 3724; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 3725; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3726; GFX10-NEXT: s_setpc_b64 s[30:31] 3727; 3728; GFX11-LABEL: v_fdiv_f32_ieee__nnan_ninf: 3729; GFX11: ; %bb.0: 3730; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3731; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 3732; GFX11-NEXT: v_rcp_f32_e32 v3, v2 3733; GFX11-NEXT: s_waitcnt_depctr 0xfff 3734; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 3735; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 3736; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 3737; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 3738; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 3739; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 3740; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 3741; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 3742; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3743; GFX11-NEXT: s_setpc_b64 s[30:31] 3744; 3745; EG-LABEL: v_fdiv_f32_ieee__nnan_ninf: 3746; EG: ; %bb.0: 3747; EG-NEXT: CF_END 3748; EG-NEXT: PAD 3749 %div = fdiv nnan ninf float %x, %y 3750 ret float %div 3751} 3752 3753define float @v_fdiv_f32_ieee_25ulp__nnan_ninf(float %x, float %y, float %z) #1 { 3754; GFX6-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf: 3755; GFX6: ; %bb.0: 3756; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3757; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 3758; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 3759; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 3760; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 3761; GFX6-NEXT: v_rcp_f32_e32 v2, v2 3762; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 3763; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 3764; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3765; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc 3766; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 3767; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 3768; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 3769; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 3770; GFX6-NEXT: s_setpc_b64 s[30:31] 3771; 3772; GFX7-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf: 3773; GFX7: ; %bb.0: 3774; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3775; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 3776; GFX7-NEXT: v_rcp_f32_e32 v2, v2 3777; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3778; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 3779; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 3780; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 3781; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 3782; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 3783; GFX7-NEXT: s_setpc_b64 s[30:31] 3784; 3785; GFX8-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf: 3786; GFX8: ; %bb.0: 3787; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3788; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 3789; GFX8-NEXT: v_rcp_f32_e32 v2, v2 3790; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3791; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 3792; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 3793; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 3794; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 3795; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 3796; GFX8-NEXT: s_setpc_b64 s[30:31] 3797; 3798; GFX10-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf: 3799; GFX10: ; %bb.0: 3800; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3801; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 3802; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3803; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 3804; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 3805; GFX10-NEXT: v_rcp_f32_e32 v2, v2 3806; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 3807; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 3808; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 3809; GFX10-NEXT: s_setpc_b64 s[30:31] 3810; 3811; GFX11-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf: 3812; GFX11: ; %bb.0: 3813; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3814; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 3815; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3816; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 3817; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 3818; GFX11-NEXT: v_rcp_f32_e32 v2, v2 3819; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 3820; GFX11-NEXT: s_waitcnt_depctr 0xfff 3821; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 3822; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 3823; GFX11-NEXT: s_setpc_b64 s[30:31] 3824; 3825; EG-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf: 3826; EG: ; %bb.0: 3827; EG-NEXT: CF_END 3828; EG-NEXT: PAD 3829 %div = fdiv nnan ninf float %x, %y, !fpmath !0 3830 ret float %div 3831} 3832 3833define float @v_fdiv_f32_dynamic__nnan_ninf(float %x, float %y, float %z) #2 { 3834; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf: 3835; GFX6-FASTFMA: ; %bb.0: 3836; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3837; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 3838; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 3839; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 3840; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 3841; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3842; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 3843; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 3844; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 3845; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 3846; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 3847; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 3848; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 3849; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 3850; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3851; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 3852; 3853; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf: 3854; GFX6-SLOWFMA: ; %bb.0: 3855; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3856; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 3857; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 3858; GFX6-SLOWFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 3859; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 3860; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3861; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 3862; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 3863; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 3864; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 3865; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 3866; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 3867; GFX6-SLOWFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 3868; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 3869; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3870; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 3871; 3872; GFX7-LABEL: v_fdiv_f32_dynamic__nnan_ninf: 3873; GFX7: ; %bb.0: 3874; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3875; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 3876; GFX7-NEXT: v_rcp_f32_e32 v3, v2 3877; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 3878; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 3879; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3880; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 3881; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 3882; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 3883; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 3884; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 3885; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 3886; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 3887; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 3888; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3889; GFX7-NEXT: s_setpc_b64 s[30:31] 3890; 3891; GFX8-LABEL: v_fdiv_f32_dynamic__nnan_ninf: 3892; GFX8: ; %bb.0: 3893; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3894; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 3895; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 3896; GFX8-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 3897; GFX8-NEXT: v_rcp_f32_e32 v4, v2 3898; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 3899; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 3900; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 3901; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 3902; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 3903; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 3904; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 3905; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 3906; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 3907; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3908; GFX8-NEXT: s_setpc_b64 s[30:31] 3909; 3910; GFX10-LABEL: v_fdiv_f32_dynamic__nnan_ninf: 3911; GFX10: ; %bb.0: 3912; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3913; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 3914; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 3915; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 3916; GFX10-NEXT: v_rcp_f32_e32 v3, v2 3917; GFX10-NEXT: s_denorm_mode 15 3918; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 3919; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 3920; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 3921; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 3922; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 3923; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 3924; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 3925; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 3926; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3927; GFX10-NEXT: s_setpc_b64 s[30:31] 3928; 3929; GFX11-LABEL: v_fdiv_f32_dynamic__nnan_ninf: 3930; GFX11: ; %bb.0: 3931; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3932; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 3933; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 3934; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2) 3935; GFX11-NEXT: v_rcp_f32_e32 v3, v2 3936; GFX11-NEXT: s_denorm_mode 15 3937; GFX11-NEXT: s_waitcnt_depctr 0xfff 3938; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 3939; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 3940; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 3941; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 3942; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 3943; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 3944; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 3945; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 3946; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 3947; GFX11-NEXT: s_setpc_b64 s[30:31] 3948; 3949; EG-LABEL: v_fdiv_f32_dynamic__nnan_ninf: 3950; EG: ; %bb.0: 3951; EG-NEXT: CF_END 3952; EG-NEXT: PAD 3953 %div = fdiv nnan ninf float %x, %y 3954 ret float %div 3955} 3956 3957define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf(float %x, float %y, float %z) #2 { 3958; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: 3959; GFX6: ; %bb.0: 3960; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3961; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 3962; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 3963; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 3964; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 3965; GFX6-NEXT: v_rcp_f32_e32 v2, v2 3966; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 3967; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 3968; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3969; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc 3970; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 3971; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 3972; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 3973; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 3974; GFX6-NEXT: s_setpc_b64 s[30:31] 3975; 3976; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: 3977; GFX7: ; %bb.0: 3978; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3979; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 3980; GFX7-NEXT: v_rcp_f32_e32 v2, v2 3981; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3982; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 3983; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 3984; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 3985; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 3986; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 3987; GFX7-NEXT: s_setpc_b64 s[30:31] 3988; 3989; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: 3990; GFX8: ; %bb.0: 3991; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3992; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 3993; GFX8-NEXT: v_rcp_f32_e32 v2, v2 3994; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 3995; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 3996; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 3997; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 3998; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 3999; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 4000; GFX8-NEXT: s_setpc_b64 s[30:31] 4001; 4002; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: 4003; GFX10: ; %bb.0: 4004; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4005; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 4006; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4007; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 4008; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 4009; GFX10-NEXT: v_rcp_f32_e32 v2, v2 4010; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 4011; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 4012; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 4013; GFX10-NEXT: s_setpc_b64 s[30:31] 4014; 4015; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: 4016; GFX11: ; %bb.0: 4017; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4018; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 4019; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4020; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 4021; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 4022; GFX11-NEXT: v_rcp_f32_e32 v2, v2 4023; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 4024; GFX11-NEXT: s_waitcnt_depctr 0xfff 4025; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 4026; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 4027; GFX11-NEXT: s_setpc_b64 s[30:31] 4028; 4029; EG-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf: 4030; EG: ; %bb.0: 4031; EG-NEXT: CF_END 4032; EG-NEXT: PAD 4033 %div = fdiv nnan ninf float %x, %y, !fpmath !0 4034 ret float %div 4035} 4036 4037define float @v_fdiv_f32_daz__nnan_ninf(float %x, float %y, float %z) #0 { 4038; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz__nnan_ninf: 4039; GFX6-FASTFMA: ; %bb.0: 4040; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4041; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 4042; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 4043; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 4044; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4045; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 4046; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 4047; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 4048; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 4049; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 4050; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 4051; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 4052; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 4053; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 4054; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 4055; 4056; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz__nnan_ninf: 4057; GFX6-SLOWFMA: ; %bb.0: 4058; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4059; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 4060; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 4061; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 4062; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4063; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 4064; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 4065; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 4066; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 4067; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 4068; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 4069; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 4070; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 4071; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 4072; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 4073; 4074; GFX7-LABEL: v_fdiv_f32_daz__nnan_ninf: 4075; GFX7: ; %bb.0: 4076; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4077; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 4078; GFX7-NEXT: v_rcp_f32_e32 v3, v2 4079; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 4080; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4081; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 4082; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 4083; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 4084; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 4085; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 4086; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 4087; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 4088; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 4089; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 4090; GFX7-NEXT: s_setpc_b64 s[30:31] 4091; 4092; GFX8-LABEL: v_fdiv_f32_daz__nnan_ninf: 4093; GFX8: ; %bb.0: 4094; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4095; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 4096; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 4097; GFX8-NEXT: v_rcp_f32_e32 v4, v2 4098; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4099; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 4100; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 4101; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 4102; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 4103; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 4104; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 4105; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 4106; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 4107; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 4108; GFX8-NEXT: s_setpc_b64 s[30:31] 4109; 4110; GFX10-LABEL: v_fdiv_f32_daz__nnan_ninf: 4111; GFX10: ; %bb.0: 4112; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4113; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 4114; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 4115; GFX10-NEXT: v_rcp_f32_e32 v3, v2 4116; GFX10-NEXT: s_denorm_mode 15 4117; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 4118; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 4119; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 4120; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 4121; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 4122; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 4123; GFX10-NEXT: s_denorm_mode 12 4124; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 4125; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 4126; GFX10-NEXT: s_setpc_b64 s[30:31] 4127; 4128; GFX11-LABEL: v_fdiv_f32_daz__nnan_ninf: 4129; GFX11: ; %bb.0: 4130; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4131; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 4132; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 4133; GFX11-NEXT: v_rcp_f32_e32 v3, v2 4134; GFX11-NEXT: s_denorm_mode 15 4135; GFX11-NEXT: s_waitcnt_depctr 0xfff 4136; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 4137; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 4138; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 4139; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 4140; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 4141; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 4142; GFX11-NEXT: s_denorm_mode 12 4143; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 4144; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 4145; GFX11-NEXT: s_setpc_b64 s[30:31] 4146; 4147; EG-LABEL: v_fdiv_f32_daz__nnan_ninf: 4148; EG: ; %bb.0: 4149; EG-NEXT: CF_END 4150; EG-NEXT: PAD 4151 %div = fdiv nnan ninf float %x, %y 4152 ret float %div 4153} 4154 4155define float @v_fdiv_f32_daz_25ulp__nnan_ninf(float %x, float %y, float %z) #0 { 4156; GFX678-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf: 4157; GFX678: ; %bb.0: 4158; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4159; GFX678-NEXT: s_mov_b32 s4, 0x6f800000 4160; GFX678-NEXT: v_mov_b32_e32 v2, 0x2f800000 4161; GFX678-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, s4 4162; GFX678-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc 4163; GFX678-NEXT: v_mul_f32_e32 v1, v1, v2 4164; GFX678-NEXT: v_rcp_f32_e32 v1, v1 4165; GFX678-NEXT: v_mul_f32_e32 v0, v0, v1 4166; GFX678-NEXT: v_mul_f32_e32 v0, v2, v0 4167; GFX678-NEXT: s_setpc_b64 s[30:31] 4168; 4169; GFX10-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf: 4170; GFX10: ; %bb.0: 4171; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4172; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1| 4173; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4 4174; GFX10-NEXT: v_mul_f32_e32 v1, v1, v2 4175; GFX10-NEXT: v_rcp_f32_e32 v1, v1 4176; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 4177; GFX10-NEXT: v_mul_f32_e32 v0, v2, v0 4178; GFX10-NEXT: s_setpc_b64 s[30:31] 4179; 4180; GFX11-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf: 4181; GFX11: ; %bb.0: 4182; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4183; GFX11-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1| 4184; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0 4185; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 4186; GFX11-NEXT: v_rcp_f32_e32 v1, v1 4187; GFX11-NEXT: s_waitcnt_depctr 0xfff 4188; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 4189; GFX11-NEXT: v_mul_f32_e32 v0, v2, v0 4190; GFX11-NEXT: s_setpc_b64 s[30:31] 4191; 4192; EG-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf: 4193; EG: ; %bb.0: 4194; EG-NEXT: CF_END 4195; EG-NEXT: PAD 4196 %div = fdiv nnan ninf float %x, %y, !fpmath !0 4197 ret float %div 4198} 4199 4200define float @v_fdiv_f32_ieee__nnan_ninf_contractable_user(float %x, float %y, float %z) #1 { 4201; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user: 4202; GFX6-FASTFMA: ; %bb.0: 4203; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4204; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4205; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 4206; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v3, v4, 1.0 4207; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v4, v4 4208; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 4209; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4 4210; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5 4211; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6 4212; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5 4213; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4214; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4215; GFX6-FASTFMA-NEXT: v_add_f32_e32 v0, v0, v2 4216; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 4217; 4218; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user: 4219; GFX6-SLOWFMA: ; %bb.0: 4220; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4221; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4222; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 4223; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v5, v3 4224; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, 1.0 4225; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v5, v5 4226; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v6, v4, v5 4227; GFX6-SLOWFMA-NEXT: v_fma_f32 v7, -v3, v6, v4 4228; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, v7, v5, v6 4229; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, -v3, v6, v4 4230; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v3, v3, v5, v6 4231; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4232; GFX6-SLOWFMA-NEXT: v_add_f32_e32 v0, v0, v2 4233; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 4234; 4235; GFX7-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user: 4236; GFX7: ; %bb.0: 4237; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4238; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4239; GFX7-NEXT: v_rcp_f32_e32 v4, v3 4240; GFX7-NEXT: v_fma_f32 v5, -v3, v4, 1.0 4241; GFX7-NEXT: v_fma_f32 v4, v5, v4, v4 4242; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 4243; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4 4244; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5 4245; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6 4246; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5 4247; GFX7-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4248; GFX7-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4249; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 4250; GFX7-NEXT: s_setpc_b64 s[30:31] 4251; 4252; GFX8-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user: 4253; GFX8: ; %bb.0: 4254; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4255; GFX8-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4256; GFX8-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 4257; GFX8-NEXT: v_rcp_f32_e32 v5, v3 4258; GFX8-NEXT: v_fma_f32 v6, -v3, v5, 1.0 4259; GFX8-NEXT: v_fma_f32 v5, v6, v5, v5 4260; GFX8-NEXT: v_mul_f32_e32 v6, v4, v5 4261; GFX8-NEXT: v_fma_f32 v7, -v3, v6, v4 4262; GFX8-NEXT: v_fma_f32 v6, v7, v5, v6 4263; GFX8-NEXT: v_fma_f32 v3, -v3, v6, v4 4264; GFX8-NEXT: v_div_fmas_f32 v3, v3, v5, v6 4265; GFX8-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4266; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 4267; GFX8-NEXT: s_setpc_b64 s[30:31] 4268; 4269; GFX10-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user: 4270; GFX10: ; %bb.0: 4271; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4272; GFX10-NEXT: v_div_scale_f32 v3, s4, v1, v1, v0 4273; GFX10-NEXT: v_rcp_f32_e32 v4, v3 4274; GFX10-NEXT: v_fma_f32 v5, -v3, v4, 1.0 4275; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v4 4276; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 4277; GFX10-NEXT: v_mul_f32_e32 v6, v5, v4 4278; GFX10-NEXT: v_fma_f32 v7, -v3, v6, v5 4279; GFX10-NEXT: v_fmac_f32_e32 v6, v7, v4 4280; GFX10-NEXT: v_fma_f32 v3, -v3, v6, v5 4281; GFX10-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4282; GFX10-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4283; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 4284; GFX10-NEXT: s_setpc_b64 s[30:31] 4285; 4286; GFX11-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user: 4287; GFX11: ; %bb.0: 4288; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4289; GFX11-NEXT: v_div_scale_f32 v3, null, v1, v1, v0 4290; GFX11-NEXT: v_rcp_f32_e32 v4, v3 4291; GFX11-NEXT: s_waitcnt_depctr 0xfff 4292; GFX11-NEXT: v_fma_f32 v5, -v3, v4, 1.0 4293; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v4 4294; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 4295; GFX11-NEXT: v_mul_f32_e32 v6, v5, v4 4296; GFX11-NEXT: v_fma_f32 v7, -v3, v6, v5 4297; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v4 4298; GFX11-NEXT: v_fma_f32 v3, -v3, v6, v5 4299; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4300; GFX11-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4301; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 4302; GFX11-NEXT: s_setpc_b64 s[30:31] 4303; 4304; EG-LABEL: v_fdiv_f32_ieee__nnan_ninf_contractable_user: 4305; EG: ; %bb.0: 4306; EG-NEXT: CF_END 4307; EG-NEXT: PAD 4308 %div = fdiv nnan ninf contract float %x, %y 4309 %add = fadd contract float %div, %z 4310 ret float %add 4311} 4312 4313define float @v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user(float %x, float %y, float %z) #1 { 4314; GFX6-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user: 4315; GFX6: ; %bb.0: 4316; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4317; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 4318; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v1 4319; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 4320; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 4321; GFX6-NEXT: v_rcp_f32_e32 v3, v3 4322; GFX6-NEXT: v_frexp_mant_f32_e32 v4, v0 4323; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 4324; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4325; GFX6-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 4326; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 4327; GFX6-NEXT: v_mul_f32_e32 v3, v4, v3 4328; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 4329; GFX6-NEXT: v_ldexp_f32_e32 v0, v3, v0 4330; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 4331; GFX6-NEXT: s_setpc_b64 s[30:31] 4332; 4333; GFX7-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user: 4334; GFX7: ; %bb.0: 4335; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4336; GFX7-NEXT: v_frexp_mant_f32_e32 v3, v1 4337; GFX7-NEXT: v_rcp_f32_e32 v3, v3 4338; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4339; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 4340; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 4341; GFX7-NEXT: v_mul_f32_e32 v0, v0, v3 4342; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 4343; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 4344; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 4345; GFX7-NEXT: s_setpc_b64 s[30:31] 4346; 4347; GFX8-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user: 4348; GFX8: ; %bb.0: 4349; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4350; GFX8-NEXT: v_frexp_mant_f32_e32 v3, v1 4351; GFX8-NEXT: v_rcp_f32_e32 v3, v3 4352; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4353; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 4354; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 4355; GFX8-NEXT: v_mul_f32_e32 v0, v0, v3 4356; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v4, v1 4357; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 4358; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 4359; GFX8-NEXT: s_setpc_b64 s[30:31] 4360; 4361; GFX10-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user: 4362; GFX10: ; %bb.0: 4363; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4364; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v1 4365; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4366; GFX10-NEXT: v_frexp_mant_f32_e32 v4, v0 4367; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 4368; GFX10-NEXT: v_rcp_f32_e32 v3, v3 4369; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 4370; GFX10-NEXT: v_mul_f32_e32 v3, v4, v3 4371; GFX10-NEXT: v_ldexp_f32 v0, v3, v0 4372; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 4373; GFX10-NEXT: s_setpc_b64 s[30:31] 4374; 4375; GFX11-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user: 4376; GFX11: ; %bb.0: 4377; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4378; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v1 4379; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4380; GFX11-NEXT: v_frexp_mant_f32_e32 v4, v0 4381; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 4382; GFX11-NEXT: v_rcp_f32_e32 v3, v3 4383; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 4384; GFX11-NEXT: s_waitcnt_depctr 0xfff 4385; GFX11-NEXT: v_mul_f32_e32 v3, v4, v3 4386; GFX11-NEXT: v_ldexp_f32 v0, v3, v0 4387; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 4388; GFX11-NEXT: s_setpc_b64 s[30:31] 4389; 4390; EG-LABEL: v_fdiv_f32_ieee_25ulp__nnan_ninf_contractable_user: 4391; EG: ; %bb.0: 4392; EG-NEXT: CF_END 4393; EG-NEXT: PAD 4394 %div = fdiv nnan ninf contract float %x, %y, !fpmath !0 4395 %add = fadd contract float %div, %z 4396 ret float %add 4397} 4398 4399define float @v_fdiv_f32_dynamic__nnan_ninf_contractable_user(float %x, float %y, float %z) #2 { 4400; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user: 4401; GFX6-FASTFMA: ; %bb.0: 4402; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4403; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4404; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 4405; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 4406; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 4407; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4408; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0 4409; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4 4410; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4 4411; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5 4412; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6 4413; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5 4414; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 4415; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4416; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4417; GFX6-FASTFMA-NEXT: v_add_f32_e32 v0, v0, v2 4418; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 4419; 4420; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user: 4421; GFX6-SLOWFMA: ; %bb.0: 4422; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4423; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4424; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 4425; GFX6-SLOWFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 4426; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v5, v3 4427; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4428; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, 1.0 4429; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v5, v5 4430; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v6, v4, v5 4431; GFX6-SLOWFMA-NEXT: v_fma_f32 v7, -v3, v6, v4 4432; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, v7, v5, v6 4433; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, -v3, v6, v4 4434; GFX6-SLOWFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 4435; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v3, v3, v5, v6 4436; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4437; GFX6-SLOWFMA-NEXT: v_add_f32_e32 v0, v0, v2 4438; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 4439; 4440; GFX7-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user: 4441; GFX7: ; %bb.0: 4442; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4443; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4444; GFX7-NEXT: v_rcp_f32_e32 v4, v3 4445; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 4446; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 4447; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4448; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0 4449; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4 4450; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4 4451; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5 4452; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6 4453; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5 4454; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 4455; GFX7-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4456; GFX7-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4457; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 4458; GFX7-NEXT: s_setpc_b64 s[30:31] 4459; 4460; GFX8-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user: 4461; GFX8: ; %bb.0: 4462; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4463; GFX8-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4464; GFX8-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 4465; GFX8-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 4466; GFX8-NEXT: v_rcp_f32_e32 v5, v3 4467; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4468; GFX8-NEXT: v_fma_f32 v6, -v3, v5, 1.0 4469; GFX8-NEXT: v_fma_f32 v5, v6, v5, v5 4470; GFX8-NEXT: v_mul_f32_e32 v6, v4, v5 4471; GFX8-NEXT: v_fma_f32 v7, -v3, v6, v4 4472; GFX8-NEXT: v_fma_f32 v6, v7, v5, v6 4473; GFX8-NEXT: v_fma_f32 v3, -v3, v6, v4 4474; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 4475; GFX8-NEXT: v_div_fmas_f32 v3, v3, v5, v6 4476; GFX8-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4477; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 4478; GFX8-NEXT: s_setpc_b64 s[30:31] 4479; 4480; GFX10-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user: 4481; GFX10: ; %bb.0: 4482; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4483; GFX10-NEXT: v_div_scale_f32 v3, s4, v1, v1, v0 4484; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 4485; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 4486; GFX10-NEXT: v_rcp_f32_e32 v4, v3 4487; GFX10-NEXT: s_denorm_mode 15 4488; GFX10-NEXT: v_fma_f32 v6, -v3, v4, 1.0 4489; GFX10-NEXT: v_fmac_f32_e32 v4, v6, v4 4490; GFX10-NEXT: v_mul_f32_e32 v6, v5, v4 4491; GFX10-NEXT: v_fma_f32 v7, -v3, v6, v5 4492; GFX10-NEXT: v_fmac_f32_e32 v6, v7, v4 4493; GFX10-NEXT: v_fma_f32 v3, -v3, v6, v5 4494; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 4495; GFX10-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4496; GFX10-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4497; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 4498; GFX10-NEXT: s_setpc_b64 s[30:31] 4499; 4500; GFX11-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user: 4501; GFX11: ; %bb.0: 4502; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4503; GFX11-NEXT: v_div_scale_f32 v3, null, v1, v1, v0 4504; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 4505; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2) 4506; GFX11-NEXT: v_rcp_f32_e32 v4, v3 4507; GFX11-NEXT: s_denorm_mode 15 4508; GFX11-NEXT: s_waitcnt_depctr 0xfff 4509; GFX11-NEXT: v_fma_f32 v6, -v3, v4, 1.0 4510; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v4 4511; GFX11-NEXT: v_mul_f32_e32 v6, v5, v4 4512; GFX11-NEXT: v_fma_f32 v7, -v3, v6, v5 4513; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v4 4514; GFX11-NEXT: v_fma_f32 v3, -v3, v6, v5 4515; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 4516; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4517; GFX11-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4518; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 4519; GFX11-NEXT: s_setpc_b64 s[30:31] 4520; 4521; EG-LABEL: v_fdiv_f32_dynamic__nnan_ninf_contractable_user: 4522; EG: ; %bb.0: 4523; EG-NEXT: CF_END 4524; EG-NEXT: PAD 4525 %div = fdiv nnan ninf contract float %x, %y 4526 %add = fadd contract float %div, %z 4527 ret float %add 4528} 4529 4530define float @v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user(float %x, float %y, float %z) #2 { 4531; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: 4532; GFX6: ; %bb.0: 4533; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4534; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 4535; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v1 4536; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 4537; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 4538; GFX6-NEXT: v_rcp_f32_e32 v3, v3 4539; GFX6-NEXT: v_frexp_mant_f32_e32 v4, v0 4540; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 4541; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4542; GFX6-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc 4543; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 4544; GFX6-NEXT: v_mul_f32_e32 v3, v4, v3 4545; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 4546; GFX6-NEXT: v_ldexp_f32_e32 v0, v3, v0 4547; GFX6-NEXT: v_add_f32_e32 v0, v0, v2 4548; GFX6-NEXT: s_setpc_b64 s[30:31] 4549; 4550; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: 4551; GFX7: ; %bb.0: 4552; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4553; GFX7-NEXT: v_frexp_mant_f32_e32 v3, v1 4554; GFX7-NEXT: v_rcp_f32_e32 v3, v3 4555; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4556; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 4557; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 4558; GFX7-NEXT: v_mul_f32_e32 v0, v0, v3 4559; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v4, v1 4560; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 4561; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 4562; GFX7-NEXT: s_setpc_b64 s[30:31] 4563; 4564; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: 4565; GFX8: ; %bb.0: 4566; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4567; GFX8-NEXT: v_frexp_mant_f32_e32 v3, v1 4568; GFX8-NEXT: v_rcp_f32_e32 v3, v3 4569; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4570; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 4571; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 4572; GFX8-NEXT: v_mul_f32_e32 v0, v0, v3 4573; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v4, v1 4574; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 4575; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 4576; GFX8-NEXT: s_setpc_b64 s[30:31] 4577; 4578; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: 4579; GFX10: ; %bb.0: 4580; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4581; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v1 4582; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4583; GFX10-NEXT: v_frexp_mant_f32_e32 v4, v0 4584; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 4585; GFX10-NEXT: v_rcp_f32_e32 v3, v3 4586; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 4587; GFX10-NEXT: v_mul_f32_e32 v3, v4, v3 4588; GFX10-NEXT: v_ldexp_f32 v0, v3, v0 4589; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 4590; GFX10-NEXT: s_setpc_b64 s[30:31] 4591; 4592; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: 4593; GFX11: ; %bb.0: 4594; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4595; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v1 4596; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4597; GFX11-NEXT: v_frexp_mant_f32_e32 v4, v0 4598; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 4599; GFX11-NEXT: v_rcp_f32_e32 v3, v3 4600; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 4601; GFX11-NEXT: s_waitcnt_depctr 0xfff 4602; GFX11-NEXT: v_mul_f32_e32 v3, v4, v3 4603; GFX11-NEXT: v_ldexp_f32 v0, v3, v0 4604; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 4605; GFX11-NEXT: s_setpc_b64 s[30:31] 4606; 4607; EG-LABEL: v_fdiv_f32_dynamic_25ulp__nnan_ninf_contractable_user: 4608; EG: ; %bb.0: 4609; EG-NEXT: CF_END 4610; EG-NEXT: PAD 4611 %div = fdiv nnan ninf contract float %x, %y, !fpmath !0 4612 %add = fadd contract float %div, %z 4613 ret float %add 4614} 4615 4616define float @v_fdiv_f32_daz__nnan_ninf_contractable_user(float %x, float %y, float %z) #0 { 4617; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user: 4618; GFX6-FASTFMA: ; %bb.0: 4619; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4620; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4621; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v4, v3 4622; GFX6-FASTFMA-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 4623; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4624; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v3, v4, 1.0 4625; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v6, v4, v4 4626; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v6, v5, v4 4627; GFX6-FASTFMA-NEXT: v_fma_f32 v7, -v3, v6, v5 4628; GFX6-FASTFMA-NEXT: v_fma_f32 v6, v7, v4, v6 4629; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v3, v6, v5 4630; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 4631; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4632; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4633; GFX6-FASTFMA-NEXT: v_add_f32_e32 v0, v0, v2 4634; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 4635; 4636; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user: 4637; GFX6-SLOWFMA: ; %bb.0: 4638; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4639; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4640; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 4641; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v5, v3 4642; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4643; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v3, v5, 1.0 4644; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v5, v5 4645; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v6, v4, v5 4646; GFX6-SLOWFMA-NEXT: v_fma_f32 v7, -v3, v6, v4 4647; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, v7, v5, v6 4648; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, -v3, v6, v4 4649; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 4650; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v3, v3, v5, v6 4651; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4652; GFX6-SLOWFMA-NEXT: v_add_f32_e32 v0, v0, v2 4653; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 4654; 4655; GFX7-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user: 4656; GFX7: ; %bb.0: 4657; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4658; GFX7-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4659; GFX7-NEXT: v_rcp_f32_e32 v4, v3 4660; GFX7-NEXT: v_div_scale_f32 v5, vcc, v0, v1, v0 4661; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4662; GFX7-NEXT: v_fma_f32 v6, -v3, v4, 1.0 4663; GFX7-NEXT: v_fma_f32 v4, v6, v4, v4 4664; GFX7-NEXT: v_mul_f32_e32 v6, v5, v4 4665; GFX7-NEXT: v_fma_f32 v7, -v3, v6, v5 4666; GFX7-NEXT: v_fma_f32 v6, v7, v4, v6 4667; GFX7-NEXT: v_fma_f32 v3, -v3, v6, v5 4668; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 4669; GFX7-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4670; GFX7-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4671; GFX7-NEXT: v_add_f32_e32 v0, v0, v2 4672; GFX7-NEXT: s_setpc_b64 s[30:31] 4673; 4674; GFX8-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user: 4675; GFX8: ; %bb.0: 4676; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4677; GFX8-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v0 4678; GFX8-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 4679; GFX8-NEXT: v_rcp_f32_e32 v5, v3 4680; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4681; GFX8-NEXT: v_fma_f32 v6, -v3, v5, 1.0 4682; GFX8-NEXT: v_fma_f32 v5, v6, v5, v5 4683; GFX8-NEXT: v_mul_f32_e32 v6, v4, v5 4684; GFX8-NEXT: v_fma_f32 v7, -v3, v6, v4 4685; GFX8-NEXT: v_fma_f32 v6, v7, v5, v6 4686; GFX8-NEXT: v_fma_f32 v3, -v3, v6, v4 4687; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 4688; GFX8-NEXT: v_div_fmas_f32 v3, v3, v5, v6 4689; GFX8-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4690; GFX8-NEXT: v_add_f32_e32 v0, v0, v2 4691; GFX8-NEXT: s_setpc_b64 s[30:31] 4692; 4693; GFX10-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user: 4694; GFX10: ; %bb.0: 4695; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4696; GFX10-NEXT: v_div_scale_f32 v3, s4, v1, v1, v0 4697; GFX10-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 4698; GFX10-NEXT: v_rcp_f32_e32 v4, v3 4699; GFX10-NEXT: s_denorm_mode 15 4700; GFX10-NEXT: v_fma_f32 v6, -v3, v4, 1.0 4701; GFX10-NEXT: v_fmac_f32_e32 v4, v6, v4 4702; GFX10-NEXT: v_mul_f32_e32 v6, v5, v4 4703; GFX10-NEXT: v_fma_f32 v7, -v3, v6, v5 4704; GFX10-NEXT: v_fmac_f32_e32 v6, v7, v4 4705; GFX10-NEXT: v_fma_f32 v3, -v3, v6, v5 4706; GFX10-NEXT: s_denorm_mode 12 4707; GFX10-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4708; GFX10-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4709; GFX10-NEXT: v_add_f32_e32 v0, v0, v2 4710; GFX10-NEXT: s_setpc_b64 s[30:31] 4711; 4712; GFX11-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user: 4713; GFX11: ; %bb.0: 4714; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4715; GFX11-NEXT: v_div_scale_f32 v3, null, v1, v1, v0 4716; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, v0, v1, v0 4717; GFX11-NEXT: v_rcp_f32_e32 v4, v3 4718; GFX11-NEXT: s_denorm_mode 15 4719; GFX11-NEXT: s_waitcnt_depctr 0xfff 4720; GFX11-NEXT: v_fma_f32 v6, -v3, v4, 1.0 4721; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v4 4722; GFX11-NEXT: v_mul_f32_e32 v6, v5, v4 4723; GFX11-NEXT: v_fma_f32 v7, -v3, v6, v5 4724; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v4 4725; GFX11-NEXT: v_fma_f32 v3, -v3, v6, v5 4726; GFX11-NEXT: s_denorm_mode 12 4727; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v6 4728; GFX11-NEXT: v_div_fixup_f32 v0, v3, v1, v0 4729; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 4730; GFX11-NEXT: s_setpc_b64 s[30:31] 4731; 4732; EG-LABEL: v_fdiv_f32_daz__nnan_ninf_contractable_user: 4733; EG: ; %bb.0: 4734; EG-NEXT: CF_END 4735; EG-NEXT: PAD 4736 %div = fdiv nnan ninf contract float %x, %y 4737 %add = fadd contract float %div, %z 4738 ret float %add 4739} 4740 4741define float @v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user(float %x, float %y, float %z) #0 { 4742; GFX678-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user: 4743; GFX678: ; %bb.0: 4744; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4745; GFX678-NEXT: s_mov_b32 s4, 0x6f800000 4746; GFX678-NEXT: v_mov_b32_e32 v3, 0x2f800000 4747; GFX678-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, s4 4748; GFX678-NEXT: v_cndmask_b32_e32 v3, 1.0, v3, vcc 4749; GFX678-NEXT: v_mul_f32_e32 v1, v1, v3 4750; GFX678-NEXT: v_rcp_f32_e32 v1, v1 4751; GFX678-NEXT: v_mul_f32_e32 v0, v0, v1 4752; GFX678-NEXT: v_mad_f32 v0, v3, v0, v2 4753; GFX678-NEXT: s_setpc_b64 s[30:31] 4754; 4755; GFX10-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user: 4756; GFX10: ; %bb.0: 4757; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4758; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1| 4759; GFX10-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x2f800000, s4 4760; GFX10-NEXT: v_mul_f32_e32 v1, v1, v3 4761; GFX10-NEXT: v_rcp_f32_e32 v1, v1 4762; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 4763; GFX10-NEXT: v_mad_f32 v0, v3, v0, v2 4764; GFX10-NEXT: s_setpc_b64 s[30:31] 4765; 4766; GFX11-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user: 4767; GFX11: ; %bb.0: 4768; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4769; GFX11-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1| 4770; GFX11-NEXT: v_cndmask_b32_e64 v3, 1.0, 0x2f800000, s0 4771; GFX11-NEXT: v_mul_f32_e32 v1, v1, v3 4772; GFX11-NEXT: v_rcp_f32_e32 v1, v1 4773; GFX11-NEXT: s_waitcnt_depctr 0xfff 4774; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 4775; GFX11-NEXT: v_fma_f32 v0, v3, v0, v2 4776; GFX11-NEXT: s_setpc_b64 s[30:31] 4777; 4778; EG-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user: 4779; EG: ; %bb.0: 4780; EG-NEXT: CF_END 4781; EG-NEXT: PAD 4782 %div = fdiv nnan ninf contract float %x, %y, !fpmath !0 4783 %add = fadd contract float %div, %z 4784 ret float %add 4785} 4786 4787define float @v_fdiv_neglhs_f32_ieee(float %x, float %y) #1 { 4788; GFX6-FASTFMA-LABEL: v_fdiv_neglhs_f32_ieee: 4789; GFX6-FASTFMA: ; %bb.0: 4790; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4791; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 4792; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 4793; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 4794; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 4795; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0 4796; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 4797; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 4798; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 4799; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 4800; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 4801; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 4802; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 4803; 4804; GFX6-SLOWFMA-LABEL: v_fdiv_neglhs_f32_ieee: 4805; GFX6-SLOWFMA: ; %bb.0: 4806; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4807; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 4808; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, -v0, v1, -v0 4809; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 4810; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 4811; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 4812; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 4813; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 4814; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 4815; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 4816; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 4817; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 4818; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 4819; 4820; GFX7-LABEL: v_fdiv_neglhs_f32_ieee: 4821; GFX7: ; %bb.0: 4822; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4823; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 4824; GFX7-NEXT: v_rcp_f32_e32 v3, v2 4825; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 4826; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 4827; GFX7-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0 4828; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 4829; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 4830; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 4831; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 4832; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 4833; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 4834; GFX7-NEXT: s_setpc_b64 s[30:31] 4835; 4836; GFX8-LABEL: v_fdiv_neglhs_f32_ieee: 4837; GFX8: ; %bb.0: 4838; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4839; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 4840; GFX8-NEXT: v_div_scale_f32 v3, vcc, -v0, v1, -v0 4841; GFX8-NEXT: v_rcp_f32_e32 v4, v2 4842; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 4843; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 4844; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 4845; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 4846; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 4847; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 4848; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 4849; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 4850; GFX8-NEXT: s_setpc_b64 s[30:31] 4851; 4852; GFX10-LABEL: v_fdiv_neglhs_f32_ieee: 4853; GFX10: ; %bb.0: 4854; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4855; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, -v0 4856; GFX10-NEXT: v_rcp_f32_e32 v3, v2 4857; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 4858; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 4859; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0 4860; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 4861; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 4862; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 4863; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 4864; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 4865; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 4866; GFX10-NEXT: s_setpc_b64 s[30:31] 4867; 4868; GFX11-LABEL: v_fdiv_neglhs_f32_ieee: 4869; GFX11: ; %bb.0: 4870; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4871; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, -v0 4872; GFX11-NEXT: v_rcp_f32_e32 v3, v2 4873; GFX11-NEXT: s_waitcnt_depctr 0xfff 4874; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 4875; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 4876; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0 4877; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 4878; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 4879; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 4880; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 4881; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 4882; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 4883; GFX11-NEXT: s_setpc_b64 s[30:31] 4884; 4885; EG-LABEL: v_fdiv_neglhs_f32_ieee: 4886; EG: ; %bb.0: 4887; EG-NEXT: CF_END 4888; EG-NEXT: PAD 4889 %neg.x = fneg float %x 4890 %div = fdiv float %neg.x, %y 4891 ret float %div 4892} 4893 4894define float @v_fdiv_neglhs_f32_ieee_25ulp(float %x, float %y) #1 { 4895; GFX6-LABEL: v_fdiv_neglhs_f32_ieee_25ulp: 4896; GFX6: ; %bb.0: 4897; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4898; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 4899; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 4900; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 4901; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 4902; GFX6-NEXT: v_rcp_f32_e32 v2, v2 4903; GFX6-NEXT: v_frexp_mant_f32_e64 v3, -v0 4904; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 4905; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4906; GFX6-NEXT: v_cndmask_b32_e64 v3, -v0, v3, s[4:5] 4907; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 4908; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 4909; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 4910; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 4911; GFX6-NEXT: s_setpc_b64 s[30:31] 4912; 4913; GFX7-LABEL: v_fdiv_neglhs_f32_ieee_25ulp: 4914; GFX7: ; %bb.0: 4915; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4916; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 4917; GFX7-NEXT: v_rcp_f32_e32 v2, v2 4918; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4919; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 4920; GFX7-NEXT: v_frexp_mant_f32_e64 v0, -v0 4921; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 4922; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 4923; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 4924; GFX7-NEXT: s_setpc_b64 s[30:31] 4925; 4926; GFX8-LABEL: v_fdiv_neglhs_f32_ieee_25ulp: 4927; GFX8: ; %bb.0: 4928; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4929; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 4930; GFX8-NEXT: v_rcp_f32_e32 v2, v2 4931; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4932; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 4933; GFX8-NEXT: v_frexp_mant_f32_e64 v0, -v0 4934; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 4935; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 4936; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 4937; GFX8-NEXT: s_setpc_b64 s[30:31] 4938; 4939; GFX10-LABEL: v_fdiv_neglhs_f32_ieee_25ulp: 4940; GFX10: ; %bb.0: 4941; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4942; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 4943; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4944; GFX10-NEXT: v_frexp_mant_f32_e64 v3, -v0 4945; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 4946; GFX10-NEXT: v_rcp_f32_e32 v2, v2 4947; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 4948; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 4949; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 4950; GFX10-NEXT: s_setpc_b64 s[30:31] 4951; 4952; GFX11-LABEL: v_fdiv_neglhs_f32_ieee_25ulp: 4953; GFX11: ; %bb.0: 4954; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4955; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 4956; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 4957; GFX11-NEXT: v_frexp_mant_f32_e64 v3, -v0 4958; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 4959; GFX11-NEXT: v_rcp_f32_e32 v2, v2 4960; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 4961; GFX11-NEXT: s_waitcnt_depctr 0xfff 4962; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 4963; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 4964; GFX11-NEXT: s_setpc_b64 s[30:31] 4965; 4966; EG-LABEL: v_fdiv_neglhs_f32_ieee_25ulp: 4967; EG: ; %bb.0: 4968; EG-NEXT: CF_END 4969; EG-NEXT: PAD 4970 %neg.x = fneg float %x 4971 %div = fdiv float %neg.x, %y, !fpmath !0 4972 ret float %div 4973} 4974 4975define float @v_fdiv_neglhs_f32_dynamic(float %x, float %y) #2 { 4976; GFX6-FASTFMA-LABEL: v_fdiv_neglhs_f32_dynamic: 4977; GFX6-FASTFMA: ; %bb.0: 4978; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4979; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 4980; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 4981; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0 4982; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 4983; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 4984; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 4985; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 4986; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 4987; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 4988; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 4989; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 4990; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 4991; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 4992; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 4993; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 4994; 4995; GFX6-SLOWFMA-LABEL: v_fdiv_neglhs_f32_dynamic: 4996; GFX6-SLOWFMA: ; %bb.0: 4997; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4998; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 4999; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, -v0, v1, -v0 5000; GFX6-SLOWFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 5001; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 5002; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5003; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 5004; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 5005; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 5006; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 5007; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 5008; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 5009; GFX6-SLOWFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 5010; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 5011; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 5012; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 5013; 5014; GFX7-LABEL: v_fdiv_neglhs_f32_dynamic: 5015; GFX7: ; %bb.0: 5016; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5017; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 5018; GFX7-NEXT: v_rcp_f32_e32 v3, v2 5019; GFX7-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0 5020; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 5021; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5022; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5023; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 5024; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 5025; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 5026; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 5027; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 5028; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 5029; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5030; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 5031; GFX7-NEXT: s_setpc_b64 s[30:31] 5032; 5033; GFX8-LABEL: v_fdiv_neglhs_f32_dynamic: 5034; GFX8: ; %bb.0: 5035; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5036; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 5037; GFX8-NEXT: v_div_scale_f32 v3, vcc, -v0, v1, -v0 5038; GFX8-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 5039; GFX8-NEXT: v_rcp_f32_e32 v4, v2 5040; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5041; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 5042; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 5043; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 5044; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 5045; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 5046; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 5047; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 5048; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 5049; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 5050; GFX8-NEXT: s_setpc_b64 s[30:31] 5051; 5052; GFX10-LABEL: v_fdiv_neglhs_f32_dynamic: 5053; GFX10: ; %bb.0: 5054; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5055; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, -v0 5056; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0 5057; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 5058; GFX10-NEXT: v_rcp_f32_e32 v3, v2 5059; GFX10-NEXT: s_denorm_mode 15 5060; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5061; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 5062; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 5063; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 5064; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 5065; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 5066; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 5067; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5068; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 5069; GFX10-NEXT: s_setpc_b64 s[30:31] 5070; 5071; GFX11-LABEL: v_fdiv_neglhs_f32_dynamic: 5072; GFX11: ; %bb.0: 5073; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5074; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, -v0 5075; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0 5076; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2) 5077; GFX11-NEXT: v_rcp_f32_e32 v3, v2 5078; GFX11-NEXT: s_denorm_mode 15 5079; GFX11-NEXT: s_waitcnt_depctr 0xfff 5080; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5081; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 5082; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 5083; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 5084; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 5085; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 5086; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 5087; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5088; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 5089; GFX11-NEXT: s_setpc_b64 s[30:31] 5090; 5091; EG-LABEL: v_fdiv_neglhs_f32_dynamic: 5092; EG: ; %bb.0: 5093; EG-NEXT: CF_END 5094; EG-NEXT: PAD 5095 %neg.x = fneg float %x 5096 %div = fdiv float %neg.x, %y 5097 ret float %div 5098} 5099 5100define float @v_fdiv_neglhs_f32_dynamic_25ulp(float %x, float %y) #2 { 5101; GFX6-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: 5102; GFX6: ; %bb.0: 5103; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5104; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 5105; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 5106; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 5107; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 5108; GFX6-NEXT: v_rcp_f32_e32 v2, v2 5109; GFX6-NEXT: v_frexp_mant_f32_e64 v3, -v0 5110; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], |v0|, s4 5111; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5112; GFX6-NEXT: v_cndmask_b32_e64 v3, -v0, v3, s[4:5] 5113; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 5114; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 5115; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 5116; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 5117; GFX6-NEXT: s_setpc_b64 s[30:31] 5118; 5119; GFX7-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: 5120; GFX7: ; %bb.0: 5121; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5122; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 5123; GFX7-NEXT: v_rcp_f32_e32 v2, v2 5124; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5125; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 5126; GFX7-NEXT: v_frexp_mant_f32_e64 v0, -v0 5127; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 5128; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 5129; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 5130; GFX7-NEXT: s_setpc_b64 s[30:31] 5131; 5132; GFX8-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: 5133; GFX8: ; %bb.0: 5134; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5135; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 5136; GFX8-NEXT: v_rcp_f32_e32 v2, v2 5137; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5138; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 5139; GFX8-NEXT: v_frexp_mant_f32_e64 v0, -v0 5140; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 5141; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 5142; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 5143; GFX8-NEXT: s_setpc_b64 s[30:31] 5144; 5145; GFX10-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: 5146; GFX10: ; %bb.0: 5147; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5148; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 5149; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5150; GFX10-NEXT: v_frexp_mant_f32_e64 v3, -v0 5151; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 5152; GFX10-NEXT: v_rcp_f32_e32 v2, v2 5153; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 5154; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 5155; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 5156; GFX10-NEXT: s_setpc_b64 s[30:31] 5157; 5158; GFX11-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: 5159; GFX11: ; %bb.0: 5160; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5161; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 5162; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5163; GFX11-NEXT: v_frexp_mant_f32_e64 v3, -v0 5164; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 5165; GFX11-NEXT: v_rcp_f32_e32 v2, v2 5166; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 5167; GFX11-NEXT: s_waitcnt_depctr 0xfff 5168; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 5169; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 5170; GFX11-NEXT: s_setpc_b64 s[30:31] 5171; 5172; EG-LABEL: v_fdiv_neglhs_f32_dynamic_25ulp: 5173; EG: ; %bb.0: 5174; EG-NEXT: CF_END 5175; EG-NEXT: PAD 5176 %neg.x = fneg float %x 5177 %div = fdiv float %neg.x, %y, !fpmath !0 5178 ret float %div 5179} 5180 5181define float @v_fdiv_neglhs_f32_daz(float %x, float %y) #0 { 5182; GFX6-FASTFMA-LABEL: v_fdiv_neglhs_f32_daz: 5183; GFX6-FASTFMA: ; %bb.0: 5184; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5185; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 5186; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 5187; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0 5188; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5189; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5190; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 5191; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 5192; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 5193; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 5194; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 5195; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 5196; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5197; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 5198; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 5199; 5200; GFX6-SLOWFMA-LABEL: v_fdiv_neglhs_f32_daz: 5201; GFX6-SLOWFMA: ; %bb.0: 5202; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5203; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 5204; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, -v0, v1, -v0 5205; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 5206; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5207; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 5208; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 5209; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 5210; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 5211; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 5212; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 5213; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 5214; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 5215; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 5216; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 5217; 5218; GFX7-LABEL: v_fdiv_neglhs_f32_daz: 5219; GFX7: ; %bb.0: 5220; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5221; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 5222; GFX7-NEXT: v_rcp_f32_e32 v3, v2 5223; GFX7-NEXT: v_div_scale_f32 v4, vcc, -v0, v1, -v0 5224; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5225; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5226; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 5227; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 5228; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 5229; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 5230; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 5231; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 5232; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5233; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 5234; GFX7-NEXT: s_setpc_b64 s[30:31] 5235; 5236; GFX8-LABEL: v_fdiv_neglhs_f32_daz: 5237; GFX8: ; %bb.0: 5238; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5239; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, -v0 5240; GFX8-NEXT: v_div_scale_f32 v3, vcc, -v0, v1, -v0 5241; GFX8-NEXT: v_rcp_f32_e32 v4, v2 5242; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5243; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 5244; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 5245; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 5246; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 5247; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 5248; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 5249; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 5250; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 5251; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 5252; GFX8-NEXT: s_setpc_b64 s[30:31] 5253; 5254; GFX10-LABEL: v_fdiv_neglhs_f32_daz: 5255; GFX10: ; %bb.0: 5256; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5257; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, -v0 5258; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0 5259; GFX10-NEXT: v_rcp_f32_e32 v3, v2 5260; GFX10-NEXT: s_denorm_mode 15 5261; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5262; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 5263; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 5264; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 5265; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 5266; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 5267; GFX10-NEXT: s_denorm_mode 12 5268; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5269; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 5270; GFX10-NEXT: s_setpc_b64 s[30:31] 5271; 5272; GFX11-LABEL: v_fdiv_neglhs_f32_daz: 5273; GFX11: ; %bb.0: 5274; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5275; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, -v0 5276; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, -v0, v1, -v0 5277; GFX11-NEXT: v_rcp_f32_e32 v3, v2 5278; GFX11-NEXT: s_denorm_mode 15 5279; GFX11-NEXT: s_waitcnt_depctr 0xfff 5280; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5281; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 5282; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 5283; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 5284; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 5285; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 5286; GFX11-NEXT: s_denorm_mode 12 5287; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5288; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, -v0 5289; GFX11-NEXT: s_setpc_b64 s[30:31] 5290; 5291; EG-LABEL: v_fdiv_neglhs_f32_daz: 5292; EG: ; %bb.0: 5293; EG-NEXT: CF_END 5294; EG-NEXT: PAD 5295 %neg.x = fneg float %x 5296 %div = fdiv float %neg.x, %y 5297 ret float %div 5298} 5299 5300define float @v_fdiv_neglhs_f32_daz_25ulp(float %x, float %y) #0 { 5301; GFX678-LABEL: v_fdiv_neglhs_f32_daz_25ulp: 5302; GFX678: ; %bb.0: 5303; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5304; GFX678-NEXT: s_mov_b32 s4, 0x6f800000 5305; GFX678-NEXT: v_mov_b32_e32 v2, 0x2f800000 5306; GFX678-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, s4 5307; GFX678-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc 5308; GFX678-NEXT: v_mul_f32_e32 v1, v1, v2 5309; GFX678-NEXT: v_rcp_f32_e32 v1, v1 5310; GFX678-NEXT: v_mul_f32_e64 v0, -v0, v1 5311; GFX678-NEXT: v_mul_f32_e32 v0, v2, v0 5312; GFX678-NEXT: s_setpc_b64 s[30:31] 5313; 5314; GFX10-LABEL: v_fdiv_neglhs_f32_daz_25ulp: 5315; GFX10: ; %bb.0: 5316; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5317; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1| 5318; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4 5319; GFX10-NEXT: v_mul_f32_e32 v1, v1, v2 5320; GFX10-NEXT: v_rcp_f32_e32 v1, v1 5321; GFX10-NEXT: v_mul_f32_e64 v0, -v0, v1 5322; GFX10-NEXT: v_mul_f32_e32 v0, v2, v0 5323; GFX10-NEXT: s_setpc_b64 s[30:31] 5324; 5325; GFX11-LABEL: v_fdiv_neglhs_f32_daz_25ulp: 5326; GFX11: ; %bb.0: 5327; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5328; GFX11-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1| 5329; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0 5330; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 5331; GFX11-NEXT: v_rcp_f32_e32 v1, v1 5332; GFX11-NEXT: s_waitcnt_depctr 0xfff 5333; GFX11-NEXT: v_mul_f32_e64 v0, -v0, v1 5334; GFX11-NEXT: v_mul_f32_e32 v0, v2, v0 5335; GFX11-NEXT: s_setpc_b64 s[30:31] 5336; 5337; EG-LABEL: v_fdiv_neglhs_f32_daz_25ulp: 5338; EG: ; %bb.0: 5339; EG-NEXT: CF_END 5340; EG-NEXT: PAD 5341 %neg.x = fneg float %x 5342 %div = fdiv float %neg.x, %y, !fpmath !0 5343 ret float %div 5344} 5345 5346define float @v_fdiv_negrhs_f32_ieee(float %x, float %y) #1 { 5347; GFX6-FASTFMA-LABEL: v_fdiv_negrhs_f32_ieee: 5348; GFX6-FASTFMA: ; %bb.0: 5349; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5350; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5351; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 5352; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 5353; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 5354; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0 5355; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 5356; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 5357; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 5358; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 5359; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5360; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5361; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 5362; 5363; GFX6-SLOWFMA-LABEL: v_fdiv_negrhs_f32_ieee: 5364; GFX6-SLOWFMA: ; %bb.0: 5365; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5366; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5367; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, -v1, v0 5368; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 5369; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 5370; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 5371; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 5372; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 5373; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 5374; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 5375; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 5376; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5377; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 5378; 5379; GFX7-LABEL: v_fdiv_negrhs_f32_ieee: 5380; GFX7: ; %bb.0: 5381; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5382; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5383; GFX7-NEXT: v_rcp_f32_e32 v3, v2 5384; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 5385; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 5386; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0 5387; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 5388; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 5389; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 5390; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 5391; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5392; GFX7-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5393; GFX7-NEXT: s_setpc_b64 s[30:31] 5394; 5395; GFX8-LABEL: v_fdiv_negrhs_f32_ieee: 5396; GFX8: ; %bb.0: 5397; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5398; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5399; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, -v1, v0 5400; GFX8-NEXT: v_rcp_f32_e32 v4, v2 5401; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 5402; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 5403; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 5404; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 5405; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 5406; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 5407; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 5408; GFX8-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5409; GFX8-NEXT: s_setpc_b64 s[30:31] 5410; 5411; GFX10-LABEL: v_fdiv_negrhs_f32_ieee: 5412; GFX10: ; %bb.0: 5413; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5414; GFX10-NEXT: v_div_scale_f32 v2, s4, -v1, -v1, v0 5415; GFX10-NEXT: v_rcp_f32_e32 v3, v2 5416; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 5417; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 5418; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, -v1, v0 5419; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 5420; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 5421; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 5422; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 5423; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5424; GFX10-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5425; GFX10-NEXT: s_setpc_b64 s[30:31] 5426; 5427; GFX11-LABEL: v_fdiv_negrhs_f32_ieee: 5428; GFX11: ; %bb.0: 5429; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5430; GFX11-NEXT: v_div_scale_f32 v2, null, -v1, -v1, v0 5431; GFX11-NEXT: v_rcp_f32_e32 v3, v2 5432; GFX11-NEXT: s_waitcnt_depctr 0xfff 5433; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 5434; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 5435; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, -v1, v0 5436; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 5437; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 5438; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 5439; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 5440; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5441; GFX11-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5442; GFX11-NEXT: s_setpc_b64 s[30:31] 5443; 5444; EG-LABEL: v_fdiv_negrhs_f32_ieee: 5445; EG: ; %bb.0: 5446; EG-NEXT: CF_END 5447; EG-NEXT: PAD 5448 %neg.y = fneg float %y 5449 %div = fdiv float %x, %neg.y 5450 ret float %div 5451} 5452 5453define float @v_fdiv_negrhs_f32_ieee_25ulp(float %x, float %y) #1 { 5454; GFX6-LABEL: v_fdiv_negrhs_f32_ieee_25ulp: 5455; GFX6: ; %bb.0: 5456; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5457; GFX6-NEXT: s_mov_b32 s6, 0x7f800000 5458; GFX6-NEXT: v_frexp_mant_f32_e64 v2, -v1 5459; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], |v1|, s6 5460; GFX6-NEXT: v_cndmask_b32_e64 v2, -v1, v2, s[4:5] 5461; GFX6-NEXT: v_rcp_f32_e32 v2, v2 5462; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 5463; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 5464; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5465; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc 5466; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 5467; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 5468; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 5469; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 5470; GFX6-NEXT: s_setpc_b64 s[30:31] 5471; 5472; GFX7-LABEL: v_fdiv_negrhs_f32_ieee_25ulp: 5473; GFX7: ; %bb.0: 5474; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5475; GFX7-NEXT: v_frexp_mant_f32_e64 v2, -v1 5476; GFX7-NEXT: v_rcp_f32_e32 v2, v2 5477; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5478; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 5479; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 5480; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 5481; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 5482; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 5483; GFX7-NEXT: s_setpc_b64 s[30:31] 5484; 5485; GFX8-LABEL: v_fdiv_negrhs_f32_ieee_25ulp: 5486; GFX8: ; %bb.0: 5487; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5488; GFX8-NEXT: v_frexp_mant_f32_e64 v2, -v1 5489; GFX8-NEXT: v_rcp_f32_e32 v2, v2 5490; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5491; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 5492; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 5493; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 5494; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 5495; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 5496; GFX8-NEXT: s_setpc_b64 s[30:31] 5497; 5498; GFX10-LABEL: v_fdiv_negrhs_f32_ieee_25ulp: 5499; GFX10: ; %bb.0: 5500; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5501; GFX10-NEXT: v_frexp_mant_f32_e64 v2, -v1 5502; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5503; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 5504; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 5505; GFX10-NEXT: v_rcp_f32_e32 v2, v2 5506; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 5507; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 5508; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 5509; GFX10-NEXT: s_setpc_b64 s[30:31] 5510; 5511; GFX11-LABEL: v_fdiv_negrhs_f32_ieee_25ulp: 5512; GFX11: ; %bb.0: 5513; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5514; GFX11-NEXT: v_frexp_mant_f32_e64 v2, -v1 5515; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5516; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 5517; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 5518; GFX11-NEXT: v_rcp_f32_e32 v2, v2 5519; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 5520; GFX11-NEXT: s_waitcnt_depctr 0xfff 5521; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 5522; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 5523; GFX11-NEXT: s_setpc_b64 s[30:31] 5524; 5525; EG-LABEL: v_fdiv_negrhs_f32_ieee_25ulp: 5526; EG: ; %bb.0: 5527; EG-NEXT: CF_END 5528; EG-NEXT: PAD 5529 %neg.y = fneg float %y 5530 %div = fdiv float %x, %neg.y, !fpmath !0 5531 ret float %div 5532} 5533 5534define float @v_fdiv_negrhs_f32_dynamic(float %x, float %y) #2 { 5535; GFX6-FASTFMA-LABEL: v_fdiv_negrhs_f32_dynamic: 5536; GFX6-FASTFMA: ; %bb.0: 5537; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5538; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5539; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 5540; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0 5541; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 5542; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5543; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5544; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 5545; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 5546; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 5547; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 5548; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 5549; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 5550; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5551; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5552; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 5553; 5554; GFX6-SLOWFMA-LABEL: v_fdiv_negrhs_f32_dynamic: 5555; GFX6-SLOWFMA: ; %bb.0: 5556; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5557; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5558; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, -v1, v0 5559; GFX6-SLOWFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 5560; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 5561; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5562; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 5563; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 5564; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 5565; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 5566; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 5567; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 5568; GFX6-SLOWFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 5569; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 5570; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5571; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 5572; 5573; GFX7-LABEL: v_fdiv_negrhs_f32_dynamic: 5574; GFX7: ; %bb.0: 5575; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5576; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5577; GFX7-NEXT: v_rcp_f32_e32 v3, v2 5578; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0 5579; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 5580; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5581; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5582; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 5583; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 5584; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 5585; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 5586; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 5587; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 5588; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5589; GFX7-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5590; GFX7-NEXT: s_setpc_b64 s[30:31] 5591; 5592; GFX8-LABEL: v_fdiv_negrhs_f32_dynamic: 5593; GFX8: ; %bb.0: 5594; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5595; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5596; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, -v1, v0 5597; GFX8-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 5598; GFX8-NEXT: v_rcp_f32_e32 v4, v2 5599; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5600; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 5601; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 5602; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 5603; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 5604; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 5605; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 5606; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 5607; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 5608; GFX8-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5609; GFX8-NEXT: s_setpc_b64 s[30:31] 5610; 5611; GFX10-LABEL: v_fdiv_negrhs_f32_dynamic: 5612; GFX10: ; %bb.0: 5613; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5614; GFX10-NEXT: v_div_scale_f32 v2, s4, -v1, -v1, v0 5615; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, -v1, v0 5616; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 5617; GFX10-NEXT: v_rcp_f32_e32 v3, v2 5618; GFX10-NEXT: s_denorm_mode 15 5619; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5620; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 5621; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 5622; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 5623; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 5624; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 5625; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 5626; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5627; GFX10-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5628; GFX10-NEXT: s_setpc_b64 s[30:31] 5629; 5630; GFX11-LABEL: v_fdiv_negrhs_f32_dynamic: 5631; GFX11: ; %bb.0: 5632; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5633; GFX11-NEXT: v_div_scale_f32 v2, null, -v1, -v1, v0 5634; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, -v1, v0 5635; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2) 5636; GFX11-NEXT: v_rcp_f32_e32 v3, v2 5637; GFX11-NEXT: s_denorm_mode 15 5638; GFX11-NEXT: s_waitcnt_depctr 0xfff 5639; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5640; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 5641; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 5642; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 5643; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 5644; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 5645; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 5646; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5647; GFX11-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5648; GFX11-NEXT: s_setpc_b64 s[30:31] 5649; 5650; EG-LABEL: v_fdiv_negrhs_f32_dynamic: 5651; EG: ; %bb.0: 5652; EG-NEXT: CF_END 5653; EG-NEXT: PAD 5654 %neg.y = fneg float %y 5655 %div = fdiv float %x, %neg.y 5656 ret float %div 5657} 5658 5659define float @v_fdiv_negrhs_f32_dynamic_25ulp(float %x, float %y) #2 { 5660; GFX6-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp: 5661; GFX6: ; %bb.0: 5662; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5663; GFX6-NEXT: s_mov_b32 s6, 0x7f800000 5664; GFX6-NEXT: v_frexp_mant_f32_e64 v2, -v1 5665; GFX6-NEXT: v_cmp_lt_f32_e64 s[4:5], |v1|, s6 5666; GFX6-NEXT: v_cndmask_b32_e64 v2, -v1, v2, s[4:5] 5667; GFX6-NEXT: v_rcp_f32_e32 v2, v2 5668; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 5669; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s6 5670; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5671; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc 5672; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 5673; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 5674; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 5675; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 5676; GFX6-NEXT: s_setpc_b64 s[30:31] 5677; 5678; GFX7-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp: 5679; GFX7: ; %bb.0: 5680; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5681; GFX7-NEXT: v_frexp_mant_f32_e64 v2, -v1 5682; GFX7-NEXT: v_rcp_f32_e32 v2, v2 5683; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5684; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 5685; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 5686; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 5687; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 5688; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 5689; GFX7-NEXT: s_setpc_b64 s[30:31] 5690; 5691; GFX8-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp: 5692; GFX8: ; %bb.0: 5693; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5694; GFX8-NEXT: v_frexp_mant_f32_e64 v2, -v1 5695; GFX8-NEXT: v_rcp_f32_e32 v2, v2 5696; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5697; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 5698; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 5699; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 5700; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 5701; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 5702; GFX8-NEXT: s_setpc_b64 s[30:31] 5703; 5704; GFX10-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp: 5705; GFX10: ; %bb.0: 5706; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5707; GFX10-NEXT: v_frexp_mant_f32_e64 v2, -v1 5708; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5709; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 5710; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 5711; GFX10-NEXT: v_rcp_f32_e32 v2, v2 5712; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 5713; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 5714; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 5715; GFX10-NEXT: s_setpc_b64 s[30:31] 5716; 5717; GFX11-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp: 5718; GFX11: ; %bb.0: 5719; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5720; GFX11-NEXT: v_frexp_mant_f32_e64 v2, -v1 5721; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 5722; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 5723; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 5724; GFX11-NEXT: v_rcp_f32_e32 v2, v2 5725; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 5726; GFX11-NEXT: s_waitcnt_depctr 0xfff 5727; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 5728; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 5729; GFX11-NEXT: s_setpc_b64 s[30:31] 5730; 5731; EG-LABEL: v_fdiv_negrhs_f32_dynamic_25ulp: 5732; EG: ; %bb.0: 5733; EG-NEXT: CF_END 5734; EG-NEXT: PAD 5735 %neg.y = fneg float %y 5736 %div = fdiv float %x, %neg.y, !fpmath !0 5737 ret float %div 5738} 5739 5740define float @v_fdiv_negrhs_f32_daz(float %x, float %y) #0 { 5741; GFX6-FASTFMA-LABEL: v_fdiv_negrhs_f32_daz: 5742; GFX6-FASTFMA: ; %bb.0: 5743; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5744; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5745; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 5746; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0 5747; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5748; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5749; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 5750; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 5751; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 5752; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 5753; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 5754; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 5755; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5756; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5757; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 5758; 5759; GFX6-SLOWFMA-LABEL: v_fdiv_negrhs_f32_daz: 5760; GFX6-SLOWFMA: ; %bb.0: 5761; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5762; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5763; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, -v1, v0 5764; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 5765; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5766; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 5767; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 5768; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 5769; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 5770; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 5771; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 5772; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 5773; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 5774; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5775; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 5776; 5777; GFX7-LABEL: v_fdiv_negrhs_f32_daz: 5778; GFX7: ; %bb.0: 5779; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5780; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5781; GFX7-NEXT: v_rcp_f32_e32 v3, v2 5782; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, -v1, v0 5783; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5784; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5785; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 5786; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 5787; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 5788; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 5789; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 5790; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 5791; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5792; GFX7-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5793; GFX7-NEXT: s_setpc_b64 s[30:31] 5794; 5795; GFX8-LABEL: v_fdiv_negrhs_f32_daz: 5796; GFX8: ; %bb.0: 5797; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5798; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], -v1, -v1, v0 5799; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, -v1, v0 5800; GFX8-NEXT: v_rcp_f32_e32 v4, v2 5801; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 5802; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 5803; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 5804; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 5805; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 5806; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 5807; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 5808; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 5809; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 5810; GFX8-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5811; GFX8-NEXT: s_setpc_b64 s[30:31] 5812; 5813; GFX10-LABEL: v_fdiv_negrhs_f32_daz: 5814; GFX10: ; %bb.0: 5815; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5816; GFX10-NEXT: v_div_scale_f32 v2, s4, -v1, -v1, v0 5817; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, -v1, v0 5818; GFX10-NEXT: v_rcp_f32_e32 v3, v2 5819; GFX10-NEXT: s_denorm_mode 15 5820; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5821; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 5822; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 5823; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 5824; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 5825; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 5826; GFX10-NEXT: s_denorm_mode 12 5827; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5828; GFX10-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5829; GFX10-NEXT: s_setpc_b64 s[30:31] 5830; 5831; GFX11-LABEL: v_fdiv_negrhs_f32_daz: 5832; GFX11: ; %bb.0: 5833; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5834; GFX11-NEXT: v_div_scale_f32 v2, null, -v1, -v1, v0 5835; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, -v1, v0 5836; GFX11-NEXT: v_rcp_f32_e32 v3, v2 5837; GFX11-NEXT: s_denorm_mode 15 5838; GFX11-NEXT: s_waitcnt_depctr 0xfff 5839; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 5840; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 5841; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 5842; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 5843; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 5844; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 5845; GFX11-NEXT: s_denorm_mode 12 5846; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 5847; GFX11-NEXT: v_div_fixup_f32 v0, v2, -v1, v0 5848; GFX11-NEXT: s_setpc_b64 s[30:31] 5849; 5850; EG-LABEL: v_fdiv_negrhs_f32_daz: 5851; EG: ; %bb.0: 5852; EG-NEXT: CF_END 5853; EG-NEXT: PAD 5854 %neg.y = fneg float %y 5855 %div = fdiv float %x, %neg.y 5856 ret float %div 5857} 5858 5859define float @v_fdiv_negrhs_f32_daz_25ulp(float %x, float %y) #0 { 5860; GFX678-LABEL: v_fdiv_negrhs_f32_daz_25ulp: 5861; GFX678: ; %bb.0: 5862; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5863; GFX678-NEXT: s_mov_b32 s4, 0x6f800000 5864; GFX678-NEXT: v_mov_b32_e32 v2, 0x2f800000 5865; GFX678-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, s4 5866; GFX678-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc 5867; GFX678-NEXT: v_mul_f32_e64 v1, -v1, v2 5868; GFX678-NEXT: v_rcp_f32_e32 v1, v1 5869; GFX678-NEXT: v_mul_f32_e32 v0, v0, v1 5870; GFX678-NEXT: v_mul_f32_e32 v0, v2, v0 5871; GFX678-NEXT: s_setpc_b64 s[30:31] 5872; 5873; GFX10-LABEL: v_fdiv_negrhs_f32_daz_25ulp: 5874; GFX10: ; %bb.0: 5875; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5876; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1| 5877; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4 5878; GFX10-NEXT: v_mul_f32_e64 v1, -v1, v2 5879; GFX10-NEXT: v_rcp_f32_e32 v1, v1 5880; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 5881; GFX10-NEXT: v_mul_f32_e32 v0, v2, v0 5882; GFX10-NEXT: s_setpc_b64 s[30:31] 5883; 5884; GFX11-LABEL: v_fdiv_negrhs_f32_daz_25ulp: 5885; GFX11: ; %bb.0: 5886; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5887; GFX11-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1| 5888; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0 5889; GFX11-NEXT: v_mul_f32_e64 v1, -v1, v2 5890; GFX11-NEXT: v_rcp_f32_e32 v1, v1 5891; GFX11-NEXT: s_waitcnt_depctr 0xfff 5892; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 5893; GFX11-NEXT: v_mul_f32_e32 v0, v2, v0 5894; GFX11-NEXT: s_setpc_b64 s[30:31] 5895; 5896; EG-LABEL: v_fdiv_negrhs_f32_daz_25ulp: 5897; EG: ; %bb.0: 5898; EG-NEXT: CF_END 5899; EG-NEXT: PAD 5900 %neg.y = fneg float %y 5901 %div = fdiv float %x, %neg.y, !fpmath !0 5902 ret float %div 5903} 5904 5905define float @v_fdiv_f32_constrhs0_ieee(float %x) #1 { 5906; GFX6-FASTFMA-LABEL: v_fdiv_f32_constrhs0_ieee: 5907; GFX6-FASTFMA: ; %bb.0: 5908; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5909; GFX6-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 5910; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 5911; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 5912; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v1, v2, 1.0 5913; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v3, v2, v2 5914; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 5915; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 5916; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 5917; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 5918; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 5919; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 5920; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 5921; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 5922; 5923; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constrhs0_ieee: 5924; GFX6-SLOWFMA: ; %bb.0: 5925; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5926; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 5927; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 5928; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 5929; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 5930; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 5931; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 5932; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 5933; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 5934; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 5935; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 5936; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 5937; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 5938; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 5939; 5940; GFX7-LABEL: v_fdiv_f32_constrhs0_ieee: 5941; GFX7: ; %bb.0: 5942; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5943; GFX7-NEXT: s_mov_b32 s6, 0x4640e400 5944; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 5945; GFX7-NEXT: v_rcp_f32_e32 v2, v1 5946; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 5947; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 5948; GFX7-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 5949; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 5950; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 5951; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 5952; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 5953; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 5954; GFX7-NEXT: v_div_fixup_f32 v0, v1, s6, v0 5955; GFX7-NEXT: s_setpc_b64 s[30:31] 5956; 5957; GFX8-LABEL: v_fdiv_f32_constrhs0_ieee: 5958; GFX8: ; %bb.0: 5959; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5960; GFX8-NEXT: s_mov_b32 s6, 0x4640e400 5961; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 5962; GFX8-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 5963; GFX8-NEXT: v_rcp_f32_e32 v3, v1 5964; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 5965; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 5966; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 5967; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 5968; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 5969; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 5970; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 5971; GFX8-NEXT: v_div_fixup_f32 v0, v1, s6, v0 5972; GFX8-NEXT: s_setpc_b64 s[30:31] 5973; 5974; GFX10-LABEL: v_fdiv_f32_constrhs0_ieee: 5975; GFX10: ; %bb.0: 5976; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5977; GFX10-NEXT: v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0 5978; GFX10-NEXT: v_rcp_f32_e32 v2, v1 5979; GFX10-NEXT: v_fma_f32 v3, -v1, v2, 1.0 5980; GFX10-NEXT: v_fmac_f32_e32 v2, v3, v2 5981; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0 5982; GFX10-NEXT: v_mul_f32_e32 v4, v3, v2 5983; GFX10-NEXT: v_fma_f32 v5, -v1, v4, v3 5984; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v2 5985; GFX10-NEXT: v_fma_f32 v1, -v1, v4, v3 5986; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v4 5987; GFX10-NEXT: v_div_fixup_f32 v0, v1, 0x4640e400, v0 5988; GFX10-NEXT: s_setpc_b64 s[30:31] 5989; 5990; GFX11-LABEL: v_fdiv_f32_constrhs0_ieee: 5991; GFX11: ; %bb.0: 5992; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5993; GFX11-NEXT: v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0 5994; GFX11-NEXT: v_rcp_f32_e32 v2, v1 5995; GFX11-NEXT: s_waitcnt_depctr 0xfff 5996; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 5997; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 5998; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0 5999; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 6000; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 6001; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 6002; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 6003; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6004; GFX11-NEXT: v_div_fixup_f32 v0, v1, 0x4640e400, v0 6005; GFX11-NEXT: s_setpc_b64 s[30:31] 6006; 6007; EG-LABEL: v_fdiv_f32_constrhs0_ieee: 6008; EG: ; %bb.0: 6009; EG-NEXT: CF_END 6010; EG-NEXT: PAD 6011 %div = fdiv float %x, 12345.0 6012 ret float %div 6013} 6014 6015define float @v_fdiv_f32_constrhs0_ieee_25ulp(float %x) #1 { 6016; GFX6-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp: 6017; GFX6: ; %bb.0: 6018; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6019; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 6020; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v0 6021; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 6022; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, 0x4640e400 6023; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 6024; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6025; GFX6-NEXT: v_mul_f32_e32 v2, 0x3fa9e0f0, v2 6026; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 6027; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 6028; GFX6-NEXT: s_setpc_b64 s[30:31] 6029; 6030; GFX7-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp: 6031; GFX7: ; %bb.0: 6032; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6033; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 6034; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 6035; GFX7-NEXT: v_mul_f32_e32 v0, 0x3fa9e0f0, v0 6036; GFX7-NEXT: v_add_i32_e32 v1, vcc, -14, v1 6037; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 6038; GFX7-NEXT: s_setpc_b64 s[30:31] 6039; 6040; GFX8-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp: 6041; GFX8: ; %bb.0: 6042; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6043; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 6044; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 6045; GFX8-NEXT: v_mul_f32_e32 v0, 0x3fa9e0f0, v0 6046; GFX8-NEXT: v_add_u32_e32 v1, vcc, -14, v1 6047; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 6048; GFX8-NEXT: s_setpc_b64 s[30:31] 6049; 6050; GFX10-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp: 6051; GFX10: ; %bb.0: 6052; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6053; GFX10-NEXT: v_frexp_mant_f32_e32 v1, v0 6054; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6055; GFX10-NEXT: v_mul_f32_e32 v1, 0x3fa9e0f0, v1 6056; GFX10-NEXT: v_add_nc_u32_e32 v0, -14, v0 6057; GFX10-NEXT: v_ldexp_f32 v0, v1, v0 6058; GFX10-NEXT: s_setpc_b64 s[30:31] 6059; 6060; GFX11-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp: 6061; GFX11: ; %bb.0: 6062; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6063; GFX11-NEXT: v_frexp_mant_f32_e32 v1, v0 6064; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6065; GFX11-NEXT: v_dual_mul_f32 v1, 0x3fa9e0f0, v1 :: v_dual_add_nc_u32 v0, -14, v0 6066; GFX11-NEXT: v_ldexp_f32 v0, v1, v0 6067; GFX11-NEXT: s_setpc_b64 s[30:31] 6068; 6069; EG-LABEL: v_fdiv_f32_constrhs0_ieee_25ulp: 6070; EG: ; %bb.0: 6071; EG-NEXT: CF_END 6072; EG-NEXT: PAD 6073 %div = fdiv float %x, 12345.0, !fpmath !0 6074 ret float %div 6075} 6076 6077define float @v_fdiv_f32_constrhs0_dynamic(float %x) #2 { 6078; GFX6-FASTFMA-LABEL: v_fdiv_f32_constrhs0_dynamic: 6079; GFX6-FASTFMA: ; %bb.0: 6080; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6081; GFX6-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 6082; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 6083; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 6084; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 6085; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 6086; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6087; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6088; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2 6089; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 6090; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 6091; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 6092; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 6093; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 6094; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6095; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 6096; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 6097; 6098; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constrhs0_dynamic: 6099; GFX6-SLOWFMA: ; %bb.0: 6100; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6101; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 6102; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 6103; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 6104; GFX6-SLOWFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 6105; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 6106; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6107; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 6108; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 6109; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 6110; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 6111; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 6112; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 6113; GFX6-SLOWFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 6114; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 6115; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 6116; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 6117; 6118; GFX7-LABEL: v_fdiv_f32_constrhs0_dynamic: 6119; GFX7: ; %bb.0: 6120; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6121; GFX7-NEXT: s_mov_b32 s6, 0x4640e400 6122; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 6123; GFX7-NEXT: v_rcp_f32_e32 v2, v1 6124; GFX7-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 6125; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 6126; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6127; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6128; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 6129; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 6130; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 6131; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 6132; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 6133; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 6134; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6135; GFX7-NEXT: v_div_fixup_f32 v0, v1, s6, v0 6136; GFX7-NEXT: s_setpc_b64 s[30:31] 6137; 6138; GFX8-LABEL: v_fdiv_f32_constrhs0_dynamic: 6139; GFX8: ; %bb.0: 6140; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6141; GFX8-NEXT: s_mov_b32 s6, 0x4640e400 6142; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 6143; GFX8-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 6144; GFX8-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 6145; GFX8-NEXT: v_rcp_f32_e32 v3, v1 6146; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6147; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 6148; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 6149; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 6150; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 6151; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 6152; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 6153; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 6154; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 6155; GFX8-NEXT: v_div_fixup_f32 v0, v1, s6, v0 6156; GFX8-NEXT: s_setpc_b64 s[30:31] 6157; 6158; GFX10-LABEL: v_fdiv_f32_constrhs0_dynamic: 6159; GFX10: ; %bb.0: 6160; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6161; GFX10-NEXT: v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0 6162; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0 6163; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 6164; GFX10-NEXT: v_rcp_f32_e32 v2, v1 6165; GFX10-NEXT: s_denorm_mode 15 6166; GFX10-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6167; GFX10-NEXT: v_fmac_f32_e32 v2, v4, v2 6168; GFX10-NEXT: v_mul_f32_e32 v4, v3, v2 6169; GFX10-NEXT: v_fma_f32 v5, -v1, v4, v3 6170; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v2 6171; GFX10-NEXT: v_fma_f32 v1, -v1, v4, v3 6172; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 6173; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6174; GFX10-NEXT: v_div_fixup_f32 v0, v1, 0x4640e400, v0 6175; GFX10-NEXT: s_setpc_b64 s[30:31] 6176; 6177; GFX11-LABEL: v_fdiv_f32_constrhs0_dynamic: 6178; GFX11: ; %bb.0: 6179; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6180; GFX11-NEXT: v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0 6181; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0 6182; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2) 6183; GFX11-NEXT: v_rcp_f32_e32 v2, v1 6184; GFX11-NEXT: s_denorm_mode 15 6185; GFX11-NEXT: s_waitcnt_depctr 0xfff 6186; GFX11-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6187; GFX11-NEXT: v_fmac_f32_e32 v2, v4, v2 6188; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 6189; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 6190; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 6191; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 6192; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 6193; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6194; GFX11-NEXT: v_div_fixup_f32 v0, v1, 0x4640e400, v0 6195; GFX11-NEXT: s_setpc_b64 s[30:31] 6196; 6197; EG-LABEL: v_fdiv_f32_constrhs0_dynamic: 6198; EG: ; %bb.0: 6199; EG-NEXT: CF_END 6200; EG-NEXT: PAD 6201 %div = fdiv float %x, 12345.0 6202 ret float %div 6203} 6204 6205define float @v_fdiv_f32_constrhs0_dynamic_25ulp(float %x) #2 { 6206; GFX6-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: 6207; GFX6: ; %bb.0: 6208; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6209; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 6210; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v0 6211; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 6212; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, 0x4640e400 6213; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 6214; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6215; GFX6-NEXT: v_mul_f32_e32 v2, 0x3fa9e0f0, v2 6216; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 6217; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 6218; GFX6-NEXT: s_setpc_b64 s[30:31] 6219; 6220; GFX7-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: 6221; GFX7: ; %bb.0: 6222; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6223; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 6224; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 6225; GFX7-NEXT: v_mul_f32_e32 v0, 0x3fa9e0f0, v0 6226; GFX7-NEXT: v_add_i32_e32 v1, vcc, -14, v1 6227; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 6228; GFX7-NEXT: s_setpc_b64 s[30:31] 6229; 6230; GFX8-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: 6231; GFX8: ; %bb.0: 6232; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6233; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 6234; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 6235; GFX8-NEXT: v_mul_f32_e32 v0, 0x3fa9e0f0, v0 6236; GFX8-NEXT: v_add_u32_e32 v1, vcc, -14, v1 6237; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 6238; GFX8-NEXT: s_setpc_b64 s[30:31] 6239; 6240; GFX10-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: 6241; GFX10: ; %bb.0: 6242; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6243; GFX10-NEXT: v_frexp_mant_f32_e32 v1, v0 6244; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6245; GFX10-NEXT: v_mul_f32_e32 v1, 0x3fa9e0f0, v1 6246; GFX10-NEXT: v_add_nc_u32_e32 v0, -14, v0 6247; GFX10-NEXT: v_ldexp_f32 v0, v1, v0 6248; GFX10-NEXT: s_setpc_b64 s[30:31] 6249; 6250; GFX11-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: 6251; GFX11: ; %bb.0: 6252; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6253; GFX11-NEXT: v_frexp_mant_f32_e32 v1, v0 6254; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6255; GFX11-NEXT: v_dual_mul_f32 v1, 0x3fa9e0f0, v1 :: v_dual_add_nc_u32 v0, -14, v0 6256; GFX11-NEXT: v_ldexp_f32 v0, v1, v0 6257; GFX11-NEXT: s_setpc_b64 s[30:31] 6258; 6259; EG-LABEL: v_fdiv_f32_constrhs0_dynamic_25ulp: 6260; EG: ; %bb.0: 6261; EG-NEXT: CF_END 6262; EG-NEXT: PAD 6263 %div = fdiv float %x, 12345.0, !fpmath !0 6264 ret float %div 6265} 6266 6267define float @v_fdiv_f32_constrhs0_daz(float %x) #0 { 6268; GFX6-FASTFMA-LABEL: v_fdiv_f32_constrhs0_daz: 6269; GFX6-FASTFMA: ; %bb.0: 6270; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6271; GFX6-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 6272; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 6273; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 6274; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 6275; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6276; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6277; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2 6278; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 6279; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 6280; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 6281; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 6282; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 6283; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6284; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 6285; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 6286; 6287; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constrhs0_daz: 6288; GFX6-SLOWFMA: ; %bb.0: 6289; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6290; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 6291; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 6292; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 6293; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 6294; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6295; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 6296; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 6297; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 6298; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 6299; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 6300; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 6301; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 6302; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 6303; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, s6, v0 6304; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 6305; 6306; GFX7-LABEL: v_fdiv_f32_constrhs0_daz: 6307; GFX7: ; %bb.0: 6308; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6309; GFX7-NEXT: s_mov_b32 s6, 0x4640e400 6310; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 6311; GFX7-NEXT: v_rcp_f32_e32 v2, v1 6312; GFX7-NEXT: v_div_scale_f32 v3, vcc, v0, s6, v0 6313; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6314; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6315; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 6316; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 6317; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 6318; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 6319; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 6320; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 6321; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6322; GFX7-NEXT: v_div_fixup_f32 v0, v1, s6, v0 6323; GFX7-NEXT: s_setpc_b64 s[30:31] 6324; 6325; GFX8-LABEL: v_fdiv_f32_constrhs0_daz: 6326; GFX8: ; %bb.0: 6327; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6328; GFX8-NEXT: s_mov_b32 s6, 0x4640e400 6329; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], s6, s6, v0 6330; GFX8-NEXT: v_div_scale_f32 v2, vcc, v0, s6, v0 6331; GFX8-NEXT: v_rcp_f32_e32 v3, v1 6332; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6333; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 6334; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 6335; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 6336; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 6337; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 6338; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 6339; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 6340; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 6341; GFX8-NEXT: v_div_fixup_f32 v0, v1, s6, v0 6342; GFX8-NEXT: s_setpc_b64 s[30:31] 6343; 6344; GFX10-LABEL: v_fdiv_f32_constrhs0_daz: 6345; GFX10: ; %bb.0: 6346; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6347; GFX10-NEXT: v_div_scale_f32 v1, s4, 0x4640e400, 0x4640e400, v0 6348; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0 6349; GFX10-NEXT: v_rcp_f32_e32 v2, v1 6350; GFX10-NEXT: s_denorm_mode 15 6351; GFX10-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6352; GFX10-NEXT: v_fmac_f32_e32 v2, v4, v2 6353; GFX10-NEXT: v_mul_f32_e32 v4, v3, v2 6354; GFX10-NEXT: v_fma_f32 v5, -v1, v4, v3 6355; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v2 6356; GFX10-NEXT: v_fma_f32 v1, -v1, v4, v3 6357; GFX10-NEXT: s_denorm_mode 12 6358; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6359; GFX10-NEXT: v_div_fixup_f32 v0, v1, 0x4640e400, v0 6360; GFX10-NEXT: s_setpc_b64 s[30:31] 6361; 6362; GFX11-LABEL: v_fdiv_f32_constrhs0_daz: 6363; GFX11: ; %bb.0: 6364; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6365; GFX11-NEXT: v_div_scale_f32 v1, null, 0x4640e400, 0x4640e400, v0 6366; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, v0, 0x4640e400, v0 6367; GFX11-NEXT: v_rcp_f32_e32 v2, v1 6368; GFX11-NEXT: s_denorm_mode 15 6369; GFX11-NEXT: s_waitcnt_depctr 0xfff 6370; GFX11-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6371; GFX11-NEXT: v_fmac_f32_e32 v2, v4, v2 6372; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 6373; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 6374; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 6375; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 6376; GFX11-NEXT: s_denorm_mode 12 6377; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6378; GFX11-NEXT: v_div_fixup_f32 v0, v1, 0x4640e400, v0 6379; GFX11-NEXT: s_setpc_b64 s[30:31] 6380; 6381; EG-LABEL: v_fdiv_f32_constrhs0_daz: 6382; EG: ; %bb.0: 6383; EG-NEXT: CF_END 6384; EG-NEXT: PAD 6385 %div = fdiv float %x, 12345.0 6386 ret float %div 6387} 6388 6389define float @v_fdiv_f32_constrhs0_daz_25ulp(float %x) #0 { 6390; GCN-LABEL: v_fdiv_f32_constrhs0_daz_25ulp: 6391; GCN: ; %bb.0: 6392; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6393; GCN-NEXT: v_mul_f32_e32 v0, 0x38a9e0f0, v0 6394; GCN-NEXT: s_setpc_b64 s[30:31] 6395; 6396; EG-LABEL: v_fdiv_f32_constrhs0_daz_25ulp: 6397; EG: ; %bb.0: 6398; EG-NEXT: CF_END 6399; EG-NEXT: PAD 6400 %div = fdiv float %x, 12345.0, !fpmath !0 6401 ret float %div 6402} 6403 6404define float @v_fdiv_f32_constlhs0_ieee(float %x) #1 { 6405; GFX6-FASTFMA-LABEL: v_fdiv_f32_constlhs0_ieee: 6406; GFX6-FASTFMA: ; %bb.0: 6407; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6408; GFX6-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 6409; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6410; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 6411; GFX6-FASTFMA-NEXT: v_fma_f32 v3, -v1, v2, 1.0 6412; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v3, v2, v2 6413; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 6414; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 6415; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 6416; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 6417; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 6418; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6419; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6420; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 6421; 6422; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constlhs0_ieee: 6423; GFX6-SLOWFMA: ; %bb.0: 6424; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6425; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 6426; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6427; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 6428; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 6429; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 6430; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 6431; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 6432; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 6433; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 6434; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 6435; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 6436; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6437; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 6438; 6439; GFX7-LABEL: v_fdiv_f32_constlhs0_ieee: 6440; GFX7: ; %bb.0: 6441; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6442; GFX7-NEXT: s_mov_b32 s6, 0x4640e400 6443; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6444; GFX7-NEXT: v_rcp_f32_e32 v2, v1 6445; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 6446; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 6447; GFX7-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 6448; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 6449; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 6450; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 6451; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 6452; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6453; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6454; GFX7-NEXT: s_setpc_b64 s[30:31] 6455; 6456; GFX8-LABEL: v_fdiv_f32_constlhs0_ieee: 6457; GFX8: ; %bb.0: 6458; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6459; GFX8-NEXT: s_mov_b32 s6, 0x4640e400 6460; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6461; GFX8-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 6462; GFX8-NEXT: v_rcp_f32_e32 v3, v1 6463; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 6464; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 6465; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 6466; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 6467; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 6468; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 6469; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 6470; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6471; GFX8-NEXT: s_setpc_b64 s[30:31] 6472; 6473; GFX10-LABEL: v_fdiv_f32_constlhs0_ieee: 6474; GFX10: ; %bb.0: 6475; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6476; GFX10-NEXT: v_div_scale_f32 v1, s4, v0, v0, 0x4640e400 6477; GFX10-NEXT: v_rcp_f32_e32 v2, v1 6478; GFX10-NEXT: v_fma_f32 v3, -v1, v2, 1.0 6479; GFX10-NEXT: v_fmac_f32_e32 v2, v3, v2 6480; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400 6481; GFX10-NEXT: v_mul_f32_e32 v4, v3, v2 6482; GFX10-NEXT: v_fma_f32 v5, -v1, v4, v3 6483; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v2 6484; GFX10-NEXT: v_fma_f32 v1, -v1, v4, v3 6485; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6486; GFX10-NEXT: v_div_fixup_f32 v0, v1, v0, 0x4640e400 6487; GFX10-NEXT: s_setpc_b64 s[30:31] 6488; 6489; GFX11-LABEL: v_fdiv_f32_constlhs0_ieee: 6490; GFX11: ; %bb.0: 6491; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6492; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 0x4640e400 6493; GFX11-NEXT: v_rcp_f32_e32 v2, v1 6494; GFX11-NEXT: s_waitcnt_depctr 0xfff 6495; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0 6496; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2 6497; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400 6498; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 6499; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 6500; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 6501; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 6502; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6503; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 0x4640e400 6504; GFX11-NEXT: s_setpc_b64 s[30:31] 6505; 6506; EG-LABEL: v_fdiv_f32_constlhs0_ieee: 6507; EG: ; %bb.0: 6508; EG-NEXT: CF_END 6509; EG-NEXT: PAD 6510 %div = fdiv float 12345.0, %x 6511 ret float %div 6512} 6513 6514define float @v_fdiv_f32_constlhs0_ieee_25ulp(float %x) #1 { 6515; GFX6-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp: 6516; GFX6: ; %bb.0: 6517; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6518; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 6519; GFX6-NEXT: v_frexp_mant_f32_e32 v1, v0 6520; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 6521; GFX6-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 6522; GFX6-NEXT: v_rcp_f32_e32 v1, v1 6523; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6524; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v2, 0x4640e400 6525; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 6526; GFX6-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 6527; GFX6-NEXT: v_ldexp_f32_e32 v0, v1, v0 6528; GFX6-NEXT: s_setpc_b64 s[30:31] 6529; 6530; GFX7-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp: 6531; GFX7: ; %bb.0: 6532; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6533; GFX7-NEXT: v_frexp_mant_f32_e32 v1, v0 6534; GFX7-NEXT: v_rcp_f32_e32 v1, v1 6535; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6536; GFX7-NEXT: v_sub_i32_e32 v0, vcc, 14, v0 6537; GFX7-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 6538; GFX7-NEXT: v_ldexp_f32_e32 v0, v1, v0 6539; GFX7-NEXT: s_setpc_b64 s[30:31] 6540; 6541; GFX8-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp: 6542; GFX8: ; %bb.0: 6543; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6544; GFX8-NEXT: v_frexp_mant_f32_e32 v1, v0 6545; GFX8-NEXT: v_rcp_f32_e32 v1, v1 6546; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6547; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 14, v0 6548; GFX8-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 6549; GFX8-NEXT: v_ldexp_f32 v0, v1, v0 6550; GFX8-NEXT: s_setpc_b64 s[30:31] 6551; 6552; GFX10-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp: 6553; GFX10: ; %bb.0: 6554; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6555; GFX10-NEXT: v_frexp_mant_f32_e32 v1, v0 6556; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6557; GFX10-NEXT: v_rcp_f32_e32 v1, v1 6558; GFX10-NEXT: v_sub_nc_u32_e32 v0, 14, v0 6559; GFX10-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 6560; GFX10-NEXT: v_ldexp_f32 v0, v1, v0 6561; GFX10-NEXT: s_setpc_b64 s[30:31] 6562; 6563; GFX11-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp: 6564; GFX11: ; %bb.0: 6565; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6566; GFX11-NEXT: v_frexp_mant_f32_e32 v1, v0 6567; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6568; GFX11-NEXT: v_rcp_f32_e32 v1, v1 6569; GFX11-NEXT: v_sub_nc_u32_e32 v0, 14, v0 6570; GFX11-NEXT: s_waitcnt_depctr 0xfff 6571; GFX11-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 6572; GFX11-NEXT: v_ldexp_f32 v0, v1, v0 6573; GFX11-NEXT: s_setpc_b64 s[30:31] 6574; 6575; EG-LABEL: v_fdiv_f32_constlhs0_ieee_25ulp: 6576; EG: ; %bb.0: 6577; EG-NEXT: CF_END 6578; EG-NEXT: PAD 6579 %div = fdiv float 12345.0, %x, !fpmath !0 6580 ret float %div 6581} 6582 6583define float @v_fdiv_f32_constlhs0_dynamic(float %x) #2 { 6584; GFX6-FASTFMA-LABEL: v_fdiv_f32_constlhs0_dynamic: 6585; GFX6-FASTFMA: ; %bb.0: 6586; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6587; GFX6-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 6588; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6589; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 6590; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 6591; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 6592; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6593; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6594; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2 6595; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 6596; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 6597; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 6598; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 6599; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 6600; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6601; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6602; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 6603; 6604; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constlhs0_dynamic: 6605; GFX6-SLOWFMA: ; %bb.0: 6606; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6607; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 6608; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6609; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 6610; GFX6-SLOWFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 6611; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 6612; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6613; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 6614; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 6615; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 6616; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 6617; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 6618; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 6619; GFX6-SLOWFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 6620; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 6621; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6622; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 6623; 6624; GFX7-LABEL: v_fdiv_f32_constlhs0_dynamic: 6625; GFX7: ; %bb.0: 6626; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6627; GFX7-NEXT: s_mov_b32 s6, 0x4640e400 6628; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6629; GFX7-NEXT: v_rcp_f32_e32 v2, v1 6630; GFX7-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 6631; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 6632; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6633; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6634; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 6635; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 6636; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 6637; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 6638; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 6639; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 6640; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6641; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6642; GFX7-NEXT: s_setpc_b64 s[30:31] 6643; 6644; GFX8-LABEL: v_fdiv_f32_constlhs0_dynamic: 6645; GFX8: ; %bb.0: 6646; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6647; GFX8-NEXT: s_mov_b32 s6, 0x4640e400 6648; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6649; GFX8-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 6650; GFX8-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 6651; GFX8-NEXT: v_rcp_f32_e32 v3, v1 6652; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6653; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 6654; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 6655; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 6656; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 6657; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 6658; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 6659; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 6660; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 6661; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6662; GFX8-NEXT: s_setpc_b64 s[30:31] 6663; 6664; GFX10-LABEL: v_fdiv_f32_constlhs0_dynamic: 6665; GFX10: ; %bb.0: 6666; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6667; GFX10-NEXT: v_div_scale_f32 v1, s4, v0, v0, 0x4640e400 6668; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400 6669; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 6670; GFX10-NEXT: v_rcp_f32_e32 v2, v1 6671; GFX10-NEXT: s_denorm_mode 15 6672; GFX10-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6673; GFX10-NEXT: v_fmac_f32_e32 v2, v4, v2 6674; GFX10-NEXT: v_mul_f32_e32 v4, v3, v2 6675; GFX10-NEXT: v_fma_f32 v5, -v1, v4, v3 6676; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v2 6677; GFX10-NEXT: v_fma_f32 v1, -v1, v4, v3 6678; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 6679; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6680; GFX10-NEXT: v_div_fixup_f32 v0, v1, v0, 0x4640e400 6681; GFX10-NEXT: s_setpc_b64 s[30:31] 6682; 6683; GFX11-LABEL: v_fdiv_f32_constlhs0_dynamic: 6684; GFX11: ; %bb.0: 6685; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6686; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 0x4640e400 6687; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400 6688; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2) 6689; GFX11-NEXT: v_rcp_f32_e32 v2, v1 6690; GFX11-NEXT: s_denorm_mode 15 6691; GFX11-NEXT: s_waitcnt_depctr 0xfff 6692; GFX11-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6693; GFX11-NEXT: v_fmac_f32_e32 v2, v4, v2 6694; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 6695; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 6696; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 6697; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 6698; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 6699; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6700; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 0x4640e400 6701; GFX11-NEXT: s_setpc_b64 s[30:31] 6702; 6703; EG-LABEL: v_fdiv_f32_constlhs0_dynamic: 6704; EG: ; %bb.0: 6705; EG-NEXT: CF_END 6706; EG-NEXT: PAD 6707 %div = fdiv float 12345.0, %x 6708 ret float %div 6709} 6710 6711define float @v_fdiv_f32_constlhs0_dynamic_25ulp(float %x) #2 { 6712; GFX6-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: 6713; GFX6: ; %bb.0: 6714; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6715; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 6716; GFX6-NEXT: v_frexp_mant_f32_e32 v1, v0 6717; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 6718; GFX6-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 6719; GFX6-NEXT: v_rcp_f32_e32 v1, v1 6720; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6721; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v2, 0x4640e400 6722; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 6723; GFX6-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 6724; GFX6-NEXT: v_ldexp_f32_e32 v0, v1, v0 6725; GFX6-NEXT: s_setpc_b64 s[30:31] 6726; 6727; GFX7-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: 6728; GFX7: ; %bb.0: 6729; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6730; GFX7-NEXT: v_frexp_mant_f32_e32 v1, v0 6731; GFX7-NEXT: v_rcp_f32_e32 v1, v1 6732; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6733; GFX7-NEXT: v_sub_i32_e32 v0, vcc, 14, v0 6734; GFX7-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 6735; GFX7-NEXT: v_ldexp_f32_e32 v0, v1, v0 6736; GFX7-NEXT: s_setpc_b64 s[30:31] 6737; 6738; GFX8-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: 6739; GFX8: ; %bb.0: 6740; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6741; GFX8-NEXT: v_frexp_mant_f32_e32 v1, v0 6742; GFX8-NEXT: v_rcp_f32_e32 v1, v1 6743; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6744; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 14, v0 6745; GFX8-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 6746; GFX8-NEXT: v_ldexp_f32 v0, v1, v0 6747; GFX8-NEXT: s_setpc_b64 s[30:31] 6748; 6749; GFX10-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: 6750; GFX10: ; %bb.0: 6751; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6752; GFX10-NEXT: v_frexp_mant_f32_e32 v1, v0 6753; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6754; GFX10-NEXT: v_rcp_f32_e32 v1, v1 6755; GFX10-NEXT: v_sub_nc_u32_e32 v0, 14, v0 6756; GFX10-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 6757; GFX10-NEXT: v_ldexp_f32 v0, v1, v0 6758; GFX10-NEXT: s_setpc_b64 s[30:31] 6759; 6760; GFX11-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: 6761; GFX11: ; %bb.0: 6762; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6763; GFX11-NEXT: v_frexp_mant_f32_e32 v1, v0 6764; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 6765; GFX11-NEXT: v_rcp_f32_e32 v1, v1 6766; GFX11-NEXT: v_sub_nc_u32_e32 v0, 14, v0 6767; GFX11-NEXT: s_waitcnt_depctr 0xfff 6768; GFX11-NEXT: v_mul_f32_e32 v1, 0x3f40e400, v1 6769; GFX11-NEXT: v_ldexp_f32 v0, v1, v0 6770; GFX11-NEXT: s_setpc_b64 s[30:31] 6771; 6772; EG-LABEL: v_fdiv_f32_constlhs0_dynamic_25ulp: 6773; EG: ; %bb.0: 6774; EG-NEXT: CF_END 6775; EG-NEXT: PAD 6776 %div = fdiv float 12345.0, %x, !fpmath !0 6777 ret float %div 6778} 6779 6780define float @v_fdiv_f32_constlhs0_daz(float %x) #0 { 6781; GFX6-FASTFMA-LABEL: v_fdiv_f32_constlhs0_daz: 6782; GFX6-FASTFMA: ; %bb.0: 6783; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6784; GFX6-FASTFMA-NEXT: s_mov_b32 s6, 0x4640e400 6785; GFX6-FASTFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6786; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v2, v1 6787; GFX6-FASTFMA-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 6788; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6789; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6790; GFX6-FASTFMA-NEXT: v_fma_f32 v2, v4, v2, v2 6791; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v4, v3, v2 6792; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v1, v4, v3 6793; GFX6-FASTFMA-NEXT: v_fma_f32 v4, v5, v2, v4 6794; GFX6-FASTFMA-NEXT: v_fma_f32 v1, -v1, v4, v3 6795; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 6796; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6797; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6798; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 6799; 6800; GFX6-SLOWFMA-LABEL: v_fdiv_f32_constlhs0_daz: 6801; GFX6-SLOWFMA: ; %bb.0: 6802; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6803; GFX6-SLOWFMA-NEXT: s_mov_b32 s6, 0x4640e400 6804; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6805; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 6806; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v3, v1 6807; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6808; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, -v1, v3, 1.0 6809; GFX6-SLOWFMA-NEXT: v_fma_f32 v3, v4, v3, v3 6810; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v4, v2, v3 6811; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v1, v4, v2 6812; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v3, v4 6813; GFX6-SLOWFMA-NEXT: v_fma_f32 v1, -v1, v4, v2 6814; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 6815; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v1, v1, v3, v4 6816; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6817; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 6818; 6819; GFX7-LABEL: v_fdiv_f32_constlhs0_daz: 6820; GFX7: ; %bb.0: 6821; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6822; GFX7-NEXT: s_mov_b32 s6, 0x4640e400 6823; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6824; GFX7-NEXT: v_rcp_f32_e32 v2, v1 6825; GFX7-NEXT: v_div_scale_f32 v3, vcc, s6, v0, s6 6826; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6827; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6828; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 6829; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 6830; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 6831; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 6832; GFX7-NEXT: v_fma_f32 v1, -v1, v4, v3 6833; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 6834; GFX7-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6835; GFX7-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6836; GFX7-NEXT: s_setpc_b64 s[30:31] 6837; 6838; GFX8-LABEL: v_fdiv_f32_constlhs0_daz: 6839; GFX8: ; %bb.0: 6840; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6841; GFX8-NEXT: s_mov_b32 s6, 0x4640e400 6842; GFX8-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, s6 6843; GFX8-NEXT: v_div_scale_f32 v2, vcc, s6, v0, s6 6844; GFX8-NEXT: v_rcp_f32_e32 v3, v1 6845; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 6846; GFX8-NEXT: v_fma_f32 v4, -v1, v3, 1.0 6847; GFX8-NEXT: v_fma_f32 v3, v4, v3, v3 6848; GFX8-NEXT: v_mul_f32_e32 v4, v2, v3 6849; GFX8-NEXT: v_fma_f32 v5, -v1, v4, v2 6850; GFX8-NEXT: v_fma_f32 v4, v5, v3, v4 6851; GFX8-NEXT: v_fma_f32 v1, -v1, v4, v2 6852; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 6853; GFX8-NEXT: v_div_fmas_f32 v1, v1, v3, v4 6854; GFX8-NEXT: v_div_fixup_f32 v0, v1, v0, s6 6855; GFX8-NEXT: s_setpc_b64 s[30:31] 6856; 6857; GFX10-LABEL: v_fdiv_f32_constlhs0_daz: 6858; GFX10: ; %bb.0: 6859; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6860; GFX10-NEXT: v_div_scale_f32 v1, s4, v0, v0, 0x4640e400 6861; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400 6862; GFX10-NEXT: v_rcp_f32_e32 v2, v1 6863; GFX10-NEXT: s_denorm_mode 15 6864; GFX10-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6865; GFX10-NEXT: v_fmac_f32_e32 v2, v4, v2 6866; GFX10-NEXT: v_mul_f32_e32 v4, v3, v2 6867; GFX10-NEXT: v_fma_f32 v5, -v1, v4, v3 6868; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v2 6869; GFX10-NEXT: v_fma_f32 v1, -v1, v4, v3 6870; GFX10-NEXT: s_denorm_mode 12 6871; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6872; GFX10-NEXT: v_div_fixup_f32 v0, v1, v0, 0x4640e400 6873; GFX10-NEXT: s_setpc_b64 s[30:31] 6874; 6875; GFX11-LABEL: v_fdiv_f32_constlhs0_daz: 6876; GFX11: ; %bb.0: 6877; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6878; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, 0x4640e400 6879; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, 0x4640e400, v0, 0x4640e400 6880; GFX11-NEXT: v_rcp_f32_e32 v2, v1 6881; GFX11-NEXT: s_denorm_mode 15 6882; GFX11-NEXT: s_waitcnt_depctr 0xfff 6883; GFX11-NEXT: v_fma_f32 v4, -v1, v2, 1.0 6884; GFX11-NEXT: v_fmac_f32_e32 v2, v4, v2 6885; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2 6886; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3 6887; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2 6888; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3 6889; GFX11-NEXT: s_denorm_mode 12 6890; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4 6891; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, 0x4640e400 6892; GFX11-NEXT: s_setpc_b64 s[30:31] 6893; 6894; EG-LABEL: v_fdiv_f32_constlhs0_daz: 6895; EG: ; %bb.0: 6896; EG-NEXT: CF_END 6897; EG-NEXT: PAD 6898 %div = fdiv float 12345.0, %x 6899 ret float %div 6900} 6901 6902define float @v_fdiv_f32_constlhs0_daz_25ulp(float %x) #0 { 6903; GFX678-LABEL: v_fdiv_f32_constlhs0_daz_25ulp: 6904; GFX678: ; %bb.0: 6905; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6906; GFX678-NEXT: s_mov_b32 s4, 0x6f800000 6907; GFX678-NEXT: v_mov_b32_e32 v1, 0x2f800000 6908; GFX678-NEXT: v_cmp_gt_f32_e64 vcc, |v0|, s4 6909; GFX678-NEXT: v_cndmask_b32_e32 v1, 1.0, v1, vcc 6910; GFX678-NEXT: v_mul_f32_e32 v0, v0, v1 6911; GFX678-NEXT: v_rcp_f32_e32 v0, v0 6912; GFX678-NEXT: v_mul_f32_e32 v0, 0x4640e400, v0 6913; GFX678-NEXT: v_mul_f32_e32 v0, v1, v0 6914; GFX678-NEXT: s_setpc_b64 s[30:31] 6915; 6916; GFX10-LABEL: v_fdiv_f32_constlhs0_daz_25ulp: 6917; GFX10: ; %bb.0: 6918; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6919; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v0| 6920; GFX10-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x2f800000, s4 6921; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 6922; GFX10-NEXT: v_rcp_f32_e32 v0, v0 6923; GFX10-NEXT: v_mul_f32_e32 v0, 0x4640e400, v0 6924; GFX10-NEXT: v_mul_f32_e32 v0, v1, v0 6925; GFX10-NEXT: s_setpc_b64 s[30:31] 6926; 6927; GFX11-LABEL: v_fdiv_f32_constlhs0_daz_25ulp: 6928; GFX11: ; %bb.0: 6929; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6930; GFX11-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v0| 6931; GFX11-NEXT: v_cndmask_b32_e64 v1, 1.0, 0x2f800000, s0 6932; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 6933; GFX11-NEXT: v_rcp_f32_e32 v0, v0 6934; GFX11-NEXT: s_waitcnt_depctr 0xfff 6935; GFX11-NEXT: v_mul_f32_e32 v0, 0x4640e400, v0 6936; GFX11-NEXT: v_mul_f32_e32 v0, v1, v0 6937; GFX11-NEXT: s_setpc_b64 s[30:31] 6938; 6939; EG-LABEL: v_fdiv_f32_constlhs0_daz_25ulp: 6940; EG: ; %bb.0: 6941; EG-NEXT: CF_END 6942; EG-NEXT: PAD 6943 %div = fdiv float 12345.0, %x, !fpmath !0 6944 ret float %div 6945} 6946 6947define float @v_fdiv_f32_ieee_nodenorm_x(float nofpclass(sub) %x, float %y) #1 { 6948; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee_nodenorm_x: 6949; GFX6-FASTFMA: ; %bb.0: 6950; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6951; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 6952; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 6953; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 6954; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 6955; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 6956; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 6957; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 6958; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 6959; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 6960; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 6961; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 6962; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 6963; 6964; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee_nodenorm_x: 6965; GFX6-SLOWFMA: ; %bb.0: 6966; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6967; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 6968; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 6969; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 6970; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 6971; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 6972; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 6973; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 6974; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 6975; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 6976; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 6977; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 6978; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 6979; 6980; GFX7-LABEL: v_fdiv_f32_ieee_nodenorm_x: 6981; GFX7: ; %bb.0: 6982; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6983; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 6984; GFX7-NEXT: v_rcp_f32_e32 v3, v2 6985; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 6986; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 6987; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 6988; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 6989; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 6990; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 6991; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 6992; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 6993; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 6994; GFX7-NEXT: s_setpc_b64 s[30:31] 6995; 6996; GFX8-LABEL: v_fdiv_f32_ieee_nodenorm_x: 6997; GFX8: ; %bb.0: 6998; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6999; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7000; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 7001; GFX8-NEXT: v_rcp_f32_e32 v4, v2 7002; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 7003; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 7004; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 7005; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 7006; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 7007; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 7008; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 7009; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7010; GFX8-NEXT: s_setpc_b64 s[30:31] 7011; 7012; GFX10-LABEL: v_fdiv_f32_ieee_nodenorm_x: 7013; GFX10: ; %bb.0: 7014; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7015; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 7016; GFX10-NEXT: v_rcp_f32_e32 v3, v2 7017; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 7018; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 7019; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7020; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 7021; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 7022; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 7023; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 7024; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7025; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7026; GFX10-NEXT: s_setpc_b64 s[30:31] 7027; 7028; GFX11-LABEL: v_fdiv_f32_ieee_nodenorm_x: 7029; GFX11: ; %bb.0: 7030; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7031; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 7032; GFX11-NEXT: v_rcp_f32_e32 v3, v2 7033; GFX11-NEXT: s_waitcnt_depctr 0xfff 7034; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 7035; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 7036; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7037; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 7038; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 7039; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 7040; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 7041; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7042; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7043; GFX11-NEXT: s_setpc_b64 s[30:31] 7044; 7045; EG-LABEL: v_fdiv_f32_ieee_nodenorm_x: 7046; EG: ; %bb.0: 7047; EG-NEXT: CF_END 7048; EG-NEXT: PAD 7049 %div = fdiv float %x, %y 7050 ret float %div 7051} 7052 7053define float @v_fdiv_f32_ieee_25ulp_nodenorm_x(float nofpclass(sub) %x, float %y) #1 { 7054; GFX6-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x: 7055; GFX6: ; %bb.0: 7056; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7057; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 7058; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 7059; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 7060; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 7061; GFX6-NEXT: v_rcp_f32_e32 v2, v2 7062; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 7063; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 7064; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7065; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc 7066; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7067; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 7068; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 7069; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 7070; GFX6-NEXT: s_setpc_b64 s[30:31] 7071; 7072; GFX7-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x: 7073; GFX7: ; %bb.0: 7074; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7075; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 7076; GFX7-NEXT: v_rcp_f32_e32 v2, v2 7077; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7078; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 7079; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 7080; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 7081; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 7082; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 7083; GFX7-NEXT: s_setpc_b64 s[30:31] 7084; 7085; GFX8-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x: 7086; GFX8: ; %bb.0: 7087; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7088; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 7089; GFX8-NEXT: v_rcp_f32_e32 v2, v2 7090; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7091; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 7092; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 7093; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 7094; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 7095; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 7096; GFX8-NEXT: s_setpc_b64 s[30:31] 7097; 7098; GFX10-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x: 7099; GFX10: ; %bb.0: 7100; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7101; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 7102; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7103; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 7104; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7105; GFX10-NEXT: v_rcp_f32_e32 v2, v2 7106; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 7107; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 7108; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 7109; GFX10-NEXT: s_setpc_b64 s[30:31] 7110; 7111; GFX11-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x: 7112; GFX11: ; %bb.0: 7113; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7114; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 7115; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7116; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 7117; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7118; GFX11-NEXT: v_rcp_f32_e32 v2, v2 7119; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 7120; GFX11-NEXT: s_waitcnt_depctr 0xfff 7121; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 7122; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 7123; GFX11-NEXT: s_setpc_b64 s[30:31] 7124; 7125; EG-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_x: 7126; EG: ; %bb.0: 7127; EG-NEXT: CF_END 7128; EG-NEXT: PAD 7129 %div = fdiv float %x, %y, !fpmath !0 7130 ret float %div 7131} 7132 7133define float @v_fdiv_f32_dynamic_nodenorm_x(float nofpclass(sub) %x, float %y) #2 { 7134; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_x: 7135; GFX6-FASTFMA: ; %bb.0: 7136; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7137; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7138; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 7139; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 7140; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 7141; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7142; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7143; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 7144; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 7145; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 7146; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 7147; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 7148; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 7149; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7150; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7151; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 7152; 7153; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_x: 7154; GFX6-SLOWFMA: ; %bb.0: 7155; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7156; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7157; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 7158; GFX6-SLOWFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 7159; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 7160; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7161; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 7162; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 7163; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 7164; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 7165; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 7166; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 7167; GFX6-SLOWFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 7168; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 7169; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7170; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 7171; 7172; GFX7-LABEL: v_fdiv_f32_dynamic_nodenorm_x: 7173; GFX7: ; %bb.0: 7174; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7175; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7176; GFX7-NEXT: v_rcp_f32_e32 v3, v2 7177; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 7178; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 7179; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7180; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7181; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 7182; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 7183; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 7184; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 7185; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 7186; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 7187; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7188; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7189; GFX7-NEXT: s_setpc_b64 s[30:31] 7190; 7191; GFX8-LABEL: v_fdiv_f32_dynamic_nodenorm_x: 7192; GFX8: ; %bb.0: 7193; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7194; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7195; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 7196; GFX8-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 7197; GFX8-NEXT: v_rcp_f32_e32 v4, v2 7198; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7199; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 7200; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 7201; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 7202; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 7203; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 7204; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 7205; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 7206; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 7207; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7208; GFX8-NEXT: s_setpc_b64 s[30:31] 7209; 7210; GFX10-LABEL: v_fdiv_f32_dynamic_nodenorm_x: 7211; GFX10: ; %bb.0: 7212; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7213; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 7214; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7215; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 7216; GFX10-NEXT: v_rcp_f32_e32 v3, v2 7217; GFX10-NEXT: s_denorm_mode 15 7218; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7219; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 7220; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 7221; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 7222; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 7223; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 7224; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 7225; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7226; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7227; GFX10-NEXT: s_setpc_b64 s[30:31] 7228; 7229; GFX11-LABEL: v_fdiv_f32_dynamic_nodenorm_x: 7230; GFX11: ; %bb.0: 7231; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7232; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 7233; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7234; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2) 7235; GFX11-NEXT: v_rcp_f32_e32 v3, v2 7236; GFX11-NEXT: s_denorm_mode 15 7237; GFX11-NEXT: s_waitcnt_depctr 0xfff 7238; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7239; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 7240; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 7241; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 7242; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 7243; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 7244; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 7245; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7246; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7247; GFX11-NEXT: s_setpc_b64 s[30:31] 7248; 7249; EG-LABEL: v_fdiv_f32_dynamic_nodenorm_x: 7250; EG: ; %bb.0: 7251; EG-NEXT: CF_END 7252; EG-NEXT: PAD 7253 %div = fdiv float %x, %y 7254 ret float %div 7255} 7256 7257define float @v_fdiv_f32_dynamic_25ulp_nodenorm_x(float nofpclass(sub) %x, float %y) #2 { 7258; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: 7259; GFX6: ; %bb.0: 7260; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7261; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 7262; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 7263; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 7264; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 7265; GFX6-NEXT: v_rcp_f32_e32 v2, v2 7266; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 7267; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 7268; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7269; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc 7270; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7271; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 7272; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 7273; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 7274; GFX6-NEXT: s_setpc_b64 s[30:31] 7275; 7276; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: 7277; GFX7: ; %bb.0: 7278; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7279; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 7280; GFX7-NEXT: v_rcp_f32_e32 v2, v2 7281; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7282; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 7283; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 7284; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 7285; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 7286; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 7287; GFX7-NEXT: s_setpc_b64 s[30:31] 7288; 7289; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: 7290; GFX8: ; %bb.0: 7291; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7292; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 7293; GFX8-NEXT: v_rcp_f32_e32 v2, v2 7294; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7295; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 7296; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 7297; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 7298; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 7299; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 7300; GFX8-NEXT: s_setpc_b64 s[30:31] 7301; 7302; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: 7303; GFX10: ; %bb.0: 7304; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7305; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 7306; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7307; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 7308; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7309; GFX10-NEXT: v_rcp_f32_e32 v2, v2 7310; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 7311; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 7312; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 7313; GFX10-NEXT: s_setpc_b64 s[30:31] 7314; 7315; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: 7316; GFX11: ; %bb.0: 7317; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7318; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 7319; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7320; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 7321; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7322; GFX11-NEXT: v_rcp_f32_e32 v2, v2 7323; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 7324; GFX11-NEXT: s_waitcnt_depctr 0xfff 7325; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 7326; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 7327; GFX11-NEXT: s_setpc_b64 s[30:31] 7328; 7329; EG-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_x: 7330; EG: ; %bb.0: 7331; EG-NEXT: CF_END 7332; EG-NEXT: PAD 7333 %div = fdiv float %x, %y, !fpmath !0 7334 ret float %div 7335} 7336 7337define float @v_fdiv_f32_daz_nodenorm_x(float nofpclass(sub) %x, float %y) #0 { 7338; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz_nodenorm_x: 7339; GFX6-FASTFMA: ; %bb.0: 7340; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7341; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7342; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 7343; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 7344; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7345; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7346; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 7347; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 7348; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 7349; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 7350; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 7351; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 7352; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7353; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7354; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 7355; 7356; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz_nodenorm_x: 7357; GFX6-SLOWFMA: ; %bb.0: 7358; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7359; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7360; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 7361; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 7362; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7363; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 7364; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 7365; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 7366; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 7367; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 7368; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 7369; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 7370; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 7371; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7372; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 7373; 7374; GFX7-LABEL: v_fdiv_f32_daz_nodenorm_x: 7375; GFX7: ; %bb.0: 7376; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7377; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7378; GFX7-NEXT: v_rcp_f32_e32 v3, v2 7379; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 7380; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7381; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7382; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 7383; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 7384; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 7385; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 7386; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 7387; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 7388; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7389; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7390; GFX7-NEXT: s_setpc_b64 s[30:31] 7391; 7392; GFX8-LABEL: v_fdiv_f32_daz_nodenorm_x: 7393; GFX8: ; %bb.0: 7394; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7395; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7396; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 7397; GFX8-NEXT: v_rcp_f32_e32 v4, v2 7398; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7399; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 7400; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 7401; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 7402; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 7403; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 7404; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 7405; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 7406; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 7407; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7408; GFX8-NEXT: s_setpc_b64 s[30:31] 7409; 7410; GFX10-LABEL: v_fdiv_f32_daz_nodenorm_x: 7411; GFX10: ; %bb.0: 7412; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7413; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 7414; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7415; GFX10-NEXT: v_rcp_f32_e32 v3, v2 7416; GFX10-NEXT: s_denorm_mode 15 7417; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7418; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 7419; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 7420; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 7421; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 7422; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 7423; GFX10-NEXT: s_denorm_mode 12 7424; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7425; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7426; GFX10-NEXT: s_setpc_b64 s[30:31] 7427; 7428; GFX11-LABEL: v_fdiv_f32_daz_nodenorm_x: 7429; GFX11: ; %bb.0: 7430; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7431; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 7432; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7433; GFX11-NEXT: v_rcp_f32_e32 v3, v2 7434; GFX11-NEXT: s_denorm_mode 15 7435; GFX11-NEXT: s_waitcnt_depctr 0xfff 7436; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7437; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 7438; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 7439; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 7440; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 7441; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 7442; GFX11-NEXT: s_denorm_mode 12 7443; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7444; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7445; GFX11-NEXT: s_setpc_b64 s[30:31] 7446; 7447; EG-LABEL: v_fdiv_f32_daz_nodenorm_x: 7448; EG: ; %bb.0: 7449; EG-NEXT: CF_END 7450; EG-NEXT: PAD 7451 %div = fdiv float %x, %y 7452 ret float %div 7453} 7454 7455define float @v_fdiv_f32_daz_25ulp_nodenorm_x(float nofpclass(sub) %x, float %y) #0 { 7456; GFX678-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_x: 7457; GFX678: ; %bb.0: 7458; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7459; GFX678-NEXT: s_mov_b32 s4, 0x6f800000 7460; GFX678-NEXT: v_mov_b32_e32 v2, 0x2f800000 7461; GFX678-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, s4 7462; GFX678-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc 7463; GFX678-NEXT: v_mul_f32_e32 v1, v1, v2 7464; GFX678-NEXT: v_rcp_f32_e32 v1, v1 7465; GFX678-NEXT: v_mul_f32_e32 v0, v0, v1 7466; GFX678-NEXT: v_mul_f32_e32 v0, v2, v0 7467; GFX678-NEXT: s_setpc_b64 s[30:31] 7468; 7469; GFX10-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_x: 7470; GFX10: ; %bb.0: 7471; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7472; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1| 7473; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4 7474; GFX10-NEXT: v_mul_f32_e32 v1, v1, v2 7475; GFX10-NEXT: v_rcp_f32_e32 v1, v1 7476; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 7477; GFX10-NEXT: v_mul_f32_e32 v0, v2, v0 7478; GFX10-NEXT: s_setpc_b64 s[30:31] 7479; 7480; GFX11-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_x: 7481; GFX11: ; %bb.0: 7482; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7483; GFX11-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1| 7484; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0 7485; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 7486; GFX11-NEXT: v_rcp_f32_e32 v1, v1 7487; GFX11-NEXT: s_waitcnt_depctr 0xfff 7488; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 7489; GFX11-NEXT: v_mul_f32_e32 v0, v2, v0 7490; GFX11-NEXT: s_setpc_b64 s[30:31] 7491; 7492; EG-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_x: 7493; EG: ; %bb.0: 7494; EG-NEXT: CF_END 7495; EG-NEXT: PAD 7496 %div = fdiv float %x, %y, !fpmath !0 7497 ret float %div 7498} 7499 7500define float @v_fdiv_f32_ieee_nodenorm_y(float %x, float nofpclass(sub) %y) #1 { 7501; GFX6-FASTFMA-LABEL: v_fdiv_f32_ieee_nodenorm_y: 7502; GFX6-FASTFMA: ; %bb.0: 7503; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7504; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7505; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 7506; GFX6-FASTFMA-NEXT: v_fma_f32 v4, -v2, v3, 1.0 7507; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v4, v3, v3 7508; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 7509; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 7510; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 7511; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 7512; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 7513; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7514; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7515; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 7516; 7517; GFX6-SLOWFMA-LABEL: v_fdiv_f32_ieee_nodenorm_y: 7518; GFX6-SLOWFMA: ; %bb.0: 7519; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7520; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7521; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 7522; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 7523; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 7524; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 7525; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 7526; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 7527; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 7528; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 7529; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 7530; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7531; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 7532; 7533; GFX7-LABEL: v_fdiv_f32_ieee_nodenorm_y: 7534; GFX7: ; %bb.0: 7535; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7536; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7537; GFX7-NEXT: v_rcp_f32_e32 v3, v2 7538; GFX7-NEXT: v_fma_f32 v4, -v2, v3, 1.0 7539; GFX7-NEXT: v_fma_f32 v3, v4, v3, v3 7540; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 7541; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 7542; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 7543; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 7544; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 7545; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7546; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7547; GFX7-NEXT: s_setpc_b64 s[30:31] 7548; 7549; GFX8-LABEL: v_fdiv_f32_ieee_nodenorm_y: 7550; GFX8: ; %bb.0: 7551; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7552; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7553; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 7554; GFX8-NEXT: v_rcp_f32_e32 v4, v2 7555; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 7556; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 7557; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 7558; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 7559; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 7560; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 7561; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 7562; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7563; GFX8-NEXT: s_setpc_b64 s[30:31] 7564; 7565; GFX10-LABEL: v_fdiv_f32_ieee_nodenorm_y: 7566; GFX10: ; %bb.0: 7567; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7568; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 7569; GFX10-NEXT: v_rcp_f32_e32 v3, v2 7570; GFX10-NEXT: v_fma_f32 v4, -v2, v3, 1.0 7571; GFX10-NEXT: v_fmac_f32_e32 v3, v4, v3 7572; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7573; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 7574; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 7575; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 7576; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 7577; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7578; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7579; GFX10-NEXT: s_setpc_b64 s[30:31] 7580; 7581; GFX11-LABEL: v_fdiv_f32_ieee_nodenorm_y: 7582; GFX11: ; %bb.0: 7583; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7584; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 7585; GFX11-NEXT: v_rcp_f32_e32 v3, v2 7586; GFX11-NEXT: s_waitcnt_depctr 0xfff 7587; GFX11-NEXT: v_fma_f32 v4, -v2, v3, 1.0 7588; GFX11-NEXT: v_fmac_f32_e32 v3, v4, v3 7589; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7590; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 7591; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 7592; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 7593; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 7594; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7595; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7596; GFX11-NEXT: s_setpc_b64 s[30:31] 7597; 7598; EG-LABEL: v_fdiv_f32_ieee_nodenorm_y: 7599; EG: ; %bb.0: 7600; EG-NEXT: CF_END 7601; EG-NEXT: PAD 7602 %div = fdiv float %x, %y 7603 ret float %div 7604} 7605 7606define float @v_fdiv_f32_ieee_25ulp_nodenorm_y(float %x, float nofpclass(sub) %y) #1 { 7607; GFX6-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y: 7608; GFX6: ; %bb.0: 7609; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7610; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 7611; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 7612; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 7613; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 7614; GFX6-NEXT: v_rcp_f32_e32 v2, v2 7615; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 7616; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 7617; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7618; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc 7619; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7620; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 7621; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 7622; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 7623; GFX6-NEXT: s_setpc_b64 s[30:31] 7624; 7625; GFX7-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y: 7626; GFX7: ; %bb.0: 7627; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7628; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 7629; GFX7-NEXT: v_rcp_f32_e32 v2, v2 7630; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7631; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 7632; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 7633; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 7634; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 7635; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 7636; GFX7-NEXT: s_setpc_b64 s[30:31] 7637; 7638; GFX8-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y: 7639; GFX8: ; %bb.0: 7640; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7641; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 7642; GFX8-NEXT: v_rcp_f32_e32 v2, v2 7643; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7644; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 7645; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 7646; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 7647; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 7648; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 7649; GFX8-NEXT: s_setpc_b64 s[30:31] 7650; 7651; GFX10-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y: 7652; GFX10: ; %bb.0: 7653; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7654; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 7655; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7656; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 7657; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7658; GFX10-NEXT: v_rcp_f32_e32 v2, v2 7659; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 7660; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 7661; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 7662; GFX10-NEXT: s_setpc_b64 s[30:31] 7663; 7664; GFX11-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y: 7665; GFX11: ; %bb.0: 7666; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7667; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 7668; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7669; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 7670; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7671; GFX11-NEXT: v_rcp_f32_e32 v2, v2 7672; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 7673; GFX11-NEXT: s_waitcnt_depctr 0xfff 7674; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 7675; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 7676; GFX11-NEXT: s_setpc_b64 s[30:31] 7677; 7678; EG-LABEL: v_fdiv_f32_ieee_25ulp_nodenorm_y: 7679; EG: ; %bb.0: 7680; EG-NEXT: CF_END 7681; EG-NEXT: PAD 7682 %div = fdiv float %x, %y, !fpmath !0 7683 ret float %div 7684} 7685 7686define float @v_fdiv_f32_dynamic_nodenorm_y(float %x, float nofpclass(sub) %y) #2 { 7687; GFX6-FASTFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_y: 7688; GFX6-FASTFMA: ; %bb.0: 7689; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7690; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7691; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 7692; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 7693; GFX6-FASTFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 7694; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7695; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7696; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 7697; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 7698; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 7699; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 7700; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 7701; GFX6-FASTFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 7702; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7703; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7704; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 7705; 7706; GFX6-SLOWFMA-LABEL: v_fdiv_f32_dynamic_nodenorm_y: 7707; GFX6-SLOWFMA: ; %bb.0: 7708; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7709; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7710; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 7711; GFX6-SLOWFMA-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 7712; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 7713; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7714; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 7715; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 7716; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 7717; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 7718; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 7719; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 7720; GFX6-SLOWFMA-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 7721; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 7722; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7723; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 7724; 7725; GFX7-LABEL: v_fdiv_f32_dynamic_nodenorm_y: 7726; GFX7: ; %bb.0: 7727; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7728; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7729; GFX7-NEXT: v_rcp_f32_e32 v3, v2 7730; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 7731; GFX7-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 7732; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7733; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7734; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 7735; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 7736; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 7737; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 7738; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 7739; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 7740; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7741; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7742; GFX7-NEXT: s_setpc_b64 s[30:31] 7743; 7744; GFX8-LABEL: v_fdiv_f32_dynamic_nodenorm_y: 7745; GFX8: ; %bb.0: 7746; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7747; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7748; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 7749; GFX8-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 7750; GFX8-NEXT: v_rcp_f32_e32 v4, v2 7751; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7752; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 7753; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 7754; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 7755; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 7756; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 7757; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 7758; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 7759; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 7760; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7761; GFX8-NEXT: s_setpc_b64 s[30:31] 7762; 7763; GFX10-LABEL: v_fdiv_f32_dynamic_nodenorm_y: 7764; GFX10: ; %bb.0: 7765; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7766; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 7767; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7768; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 4, 2) 7769; GFX10-NEXT: v_rcp_f32_e32 v3, v2 7770; GFX10-NEXT: s_denorm_mode 15 7771; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7772; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 7773; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 7774; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 7775; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 7776; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 7777; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s4 7778; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7779; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7780; GFX10-NEXT: s_setpc_b64 s[30:31] 7781; 7782; GFX11-LABEL: v_fdiv_f32_dynamic_nodenorm_y: 7783; GFX11: ; %bb.0: 7784; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7785; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 7786; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7787; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 4, 2) 7788; GFX11-NEXT: v_rcp_f32_e32 v3, v2 7789; GFX11-NEXT: s_denorm_mode 15 7790; GFX11-NEXT: s_waitcnt_depctr 0xfff 7791; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7792; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 7793; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 7794; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 7795; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 7796; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 7797; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 4, 2), s0 7798; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7799; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7800; GFX11-NEXT: s_setpc_b64 s[30:31] 7801; 7802; EG-LABEL: v_fdiv_f32_dynamic_nodenorm_y: 7803; EG: ; %bb.0: 7804; EG-NEXT: CF_END 7805; EG-NEXT: PAD 7806 %div = fdiv float %x, %y 7807 ret float %div 7808} 7809 7810define float @v_fdiv_f32_dynamic_25ulp_nodenorm_y(float %x, float nofpclass(sub) %y) #2 { 7811; GFX6-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: 7812; GFX6: ; %bb.0: 7813; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7814; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 7815; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 7816; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 7817; GFX6-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc 7818; GFX6-NEXT: v_rcp_f32_e32 v2, v2 7819; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v0 7820; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 7821; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7822; GFX6-NEXT: v_cndmask_b32_e32 v3, v0, v3, vcc 7823; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7824; GFX6-NEXT: v_mul_f32_e32 v2, v3, v2 7825; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 7826; GFX6-NEXT: v_ldexp_f32_e32 v0, v2, v0 7827; GFX6-NEXT: s_setpc_b64 s[30:31] 7828; 7829; GFX7-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: 7830; GFX7: ; %bb.0: 7831; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7832; GFX7-NEXT: v_frexp_mant_f32_e32 v2, v1 7833; GFX7-NEXT: v_rcp_f32_e32 v2, v2 7834; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7835; GFX7-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 7836; GFX7-NEXT: v_frexp_mant_f32_e32 v0, v0 7837; GFX7-NEXT: v_mul_f32_e32 v0, v0, v2 7838; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v3, v1 7839; GFX7-NEXT: v_ldexp_f32_e32 v0, v0, v1 7840; GFX7-NEXT: s_setpc_b64 s[30:31] 7841; 7842; GFX8-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: 7843; GFX8: ; %bb.0: 7844; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7845; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v1 7846; GFX8-NEXT: v_rcp_f32_e32 v2, v2 7847; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7848; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 7849; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 7850; GFX8-NEXT: v_mul_f32_e32 v0, v0, v2 7851; GFX8-NEXT: v_sub_u32_e32 v1, vcc, v3, v1 7852; GFX8-NEXT: v_ldexp_f32 v0, v0, v1 7853; GFX8-NEXT: s_setpc_b64 s[30:31] 7854; 7855; GFX10-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: 7856; GFX10: ; %bb.0: 7857; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7858; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v1 7859; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7860; GFX10-NEXT: v_frexp_mant_f32_e32 v3, v0 7861; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7862; GFX10-NEXT: v_rcp_f32_e32 v2, v2 7863; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 7864; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 7865; GFX10-NEXT: v_ldexp_f32 v0, v2, v0 7866; GFX10-NEXT: s_setpc_b64 s[30:31] 7867; 7868; GFX11-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: 7869; GFX11: ; %bb.0: 7870; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7871; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v1 7872; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 7873; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v0 7874; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 7875; GFX11-NEXT: v_rcp_f32_e32 v2, v2 7876; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 7877; GFX11-NEXT: s_waitcnt_depctr 0xfff 7878; GFX11-NEXT: v_mul_f32_e32 v2, v3, v2 7879; GFX11-NEXT: v_ldexp_f32 v0, v2, v0 7880; GFX11-NEXT: s_setpc_b64 s[30:31] 7881; 7882; EG-LABEL: v_fdiv_f32_dynamic_25ulp_nodenorm_y: 7883; EG: ; %bb.0: 7884; EG-NEXT: CF_END 7885; EG-NEXT: PAD 7886 %div = fdiv float %x, %y, !fpmath !0 7887 ret float %div 7888} 7889 7890define float @v_fdiv_f32_daz_nodenorm_y(float %x, float nofpclass(sub) %y) #0 { 7891; GFX6-FASTFMA-LABEL: v_fdiv_f32_daz_nodenorm_y: 7892; GFX6-FASTFMA: ; %bb.0: 7893; GFX6-FASTFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7894; GFX6-FASTFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7895; GFX6-FASTFMA-NEXT: v_rcp_f32_e32 v3, v2 7896; GFX6-FASTFMA-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 7897; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7898; GFX6-FASTFMA-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7899; GFX6-FASTFMA-NEXT: v_fma_f32 v3, v5, v3, v3 7900; GFX6-FASTFMA-NEXT: v_mul_f32_e32 v5, v4, v3 7901; GFX6-FASTFMA-NEXT: v_fma_f32 v6, -v2, v5, v4 7902; GFX6-FASTFMA-NEXT: v_fma_f32 v5, v6, v3, v5 7903; GFX6-FASTFMA-NEXT: v_fma_f32 v2, -v2, v5, v4 7904; GFX6-FASTFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 7905; GFX6-FASTFMA-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7906; GFX6-FASTFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7907; GFX6-FASTFMA-NEXT: s_setpc_b64 s[30:31] 7908; 7909; GFX6-SLOWFMA-LABEL: v_fdiv_f32_daz_nodenorm_y: 7910; GFX6-SLOWFMA: ; %bb.0: 7911; GFX6-SLOWFMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7912; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7913; GFX6-SLOWFMA-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 7914; GFX6-SLOWFMA-NEXT: v_rcp_f32_e32 v4, v2 7915; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7916; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, -v2, v4, 1.0 7917; GFX6-SLOWFMA-NEXT: v_fma_f32 v4, v5, v4, v4 7918; GFX6-SLOWFMA-NEXT: v_mul_f32_e32 v5, v3, v4 7919; GFX6-SLOWFMA-NEXT: v_fma_f32 v6, -v2, v5, v3 7920; GFX6-SLOWFMA-NEXT: v_fma_f32 v5, v6, v4, v5 7921; GFX6-SLOWFMA-NEXT: v_fma_f32 v2, -v2, v5, v3 7922; GFX6-SLOWFMA-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 7923; GFX6-SLOWFMA-NEXT: v_div_fmas_f32 v2, v2, v4, v5 7924; GFX6-SLOWFMA-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7925; GFX6-SLOWFMA-NEXT: s_setpc_b64 s[30:31] 7926; 7927; GFX7-LABEL: v_fdiv_f32_daz_nodenorm_y: 7928; GFX7: ; %bb.0: 7929; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7930; GFX7-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7931; GFX7-NEXT: v_rcp_f32_e32 v3, v2 7932; GFX7-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 7933; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7934; GFX7-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7935; GFX7-NEXT: v_fma_f32 v3, v5, v3, v3 7936; GFX7-NEXT: v_mul_f32_e32 v5, v4, v3 7937; GFX7-NEXT: v_fma_f32 v6, -v2, v5, v4 7938; GFX7-NEXT: v_fma_f32 v5, v6, v3, v5 7939; GFX7-NEXT: v_fma_f32 v2, -v2, v5, v4 7940; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 7941; GFX7-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7942; GFX7-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7943; GFX7-NEXT: s_setpc_b64 s[30:31] 7944; 7945; GFX8-LABEL: v_fdiv_f32_daz_nodenorm_y: 7946; GFX8: ; %bb.0: 7947; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7948; GFX8-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 7949; GFX8-NEXT: v_div_scale_f32 v3, vcc, v0, v1, v0 7950; GFX8-NEXT: v_rcp_f32_e32 v4, v2 7951; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 7952; GFX8-NEXT: v_fma_f32 v5, -v2, v4, 1.0 7953; GFX8-NEXT: v_fma_f32 v4, v5, v4, v4 7954; GFX8-NEXT: v_mul_f32_e32 v5, v3, v4 7955; GFX8-NEXT: v_fma_f32 v6, -v2, v5, v3 7956; GFX8-NEXT: v_fma_f32 v5, v6, v4, v5 7957; GFX8-NEXT: v_fma_f32 v2, -v2, v5, v3 7958; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 7959; GFX8-NEXT: v_div_fmas_f32 v2, v2, v4, v5 7960; GFX8-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7961; GFX8-NEXT: s_setpc_b64 s[30:31] 7962; 7963; GFX10-LABEL: v_fdiv_f32_daz_nodenorm_y: 7964; GFX10: ; %bb.0: 7965; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7966; GFX10-NEXT: v_div_scale_f32 v2, s4, v1, v1, v0 7967; GFX10-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7968; GFX10-NEXT: v_rcp_f32_e32 v3, v2 7969; GFX10-NEXT: s_denorm_mode 15 7970; GFX10-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7971; GFX10-NEXT: v_fmac_f32_e32 v3, v5, v3 7972; GFX10-NEXT: v_mul_f32_e32 v5, v4, v3 7973; GFX10-NEXT: v_fma_f32 v6, -v2, v5, v4 7974; GFX10-NEXT: v_fmac_f32_e32 v5, v6, v3 7975; GFX10-NEXT: v_fma_f32 v2, -v2, v5, v4 7976; GFX10-NEXT: s_denorm_mode 12 7977; GFX10-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7978; GFX10-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7979; GFX10-NEXT: s_setpc_b64 s[30:31] 7980; 7981; GFX11-LABEL: v_fdiv_f32_daz_nodenorm_y: 7982; GFX11: ; %bb.0: 7983; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7984; GFX11-NEXT: v_div_scale_f32 v2, null, v1, v1, v0 7985; GFX11-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0 7986; GFX11-NEXT: v_rcp_f32_e32 v3, v2 7987; GFX11-NEXT: s_denorm_mode 15 7988; GFX11-NEXT: s_waitcnt_depctr 0xfff 7989; GFX11-NEXT: v_fma_f32 v5, -v2, v3, 1.0 7990; GFX11-NEXT: v_fmac_f32_e32 v3, v5, v3 7991; GFX11-NEXT: v_mul_f32_e32 v5, v4, v3 7992; GFX11-NEXT: v_fma_f32 v6, -v2, v5, v4 7993; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v3 7994; GFX11-NEXT: v_fma_f32 v2, -v2, v5, v4 7995; GFX11-NEXT: s_denorm_mode 12 7996; GFX11-NEXT: v_div_fmas_f32 v2, v2, v3, v5 7997; GFX11-NEXT: v_div_fixup_f32 v0, v2, v1, v0 7998; GFX11-NEXT: s_setpc_b64 s[30:31] 7999; 8000; EG-LABEL: v_fdiv_f32_daz_nodenorm_y: 8001; EG: ; %bb.0: 8002; EG-NEXT: CF_END 8003; EG-NEXT: PAD 8004 %div = fdiv float %x, %y 8005 ret float %div 8006} 8007 8008define float @v_fdiv_f32_daz_25ulp_nodenorm_y(float %x, float nofpclass(sub) %y) #0 { 8009; GFX678-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_y: 8010; GFX678: ; %bb.0: 8011; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8012; GFX678-NEXT: s_mov_b32 s4, 0x6f800000 8013; GFX678-NEXT: v_mov_b32_e32 v2, 0x2f800000 8014; GFX678-NEXT: v_cmp_gt_f32_e64 vcc, |v1|, s4 8015; GFX678-NEXT: v_cndmask_b32_e32 v2, 1.0, v2, vcc 8016; GFX678-NEXT: v_mul_f32_e32 v1, v1, v2 8017; GFX678-NEXT: v_rcp_f32_e32 v1, v1 8018; GFX678-NEXT: v_mul_f32_e32 v0, v0, v1 8019; GFX678-NEXT: v_mul_f32_e32 v0, v2, v0 8020; GFX678-NEXT: s_setpc_b64 s[30:31] 8021; 8022; GFX10-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_y: 8023; GFX10: ; %bb.0: 8024; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8025; GFX10-NEXT: v_cmp_lt_f32_e64 s4, 0x6f800000, |v1| 8026; GFX10-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s4 8027; GFX10-NEXT: v_mul_f32_e32 v1, v1, v2 8028; GFX10-NEXT: v_rcp_f32_e32 v1, v1 8029; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 8030; GFX10-NEXT: v_mul_f32_e32 v0, v2, v0 8031; GFX10-NEXT: s_setpc_b64 s[30:31] 8032; 8033; GFX11-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_y: 8034; GFX11: ; %bb.0: 8035; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8036; GFX11-NEXT: v_cmp_lt_f32_e64 s0, 0x6f800000, |v1| 8037; GFX11-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s0 8038; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 8039; GFX11-NEXT: v_rcp_f32_e32 v1, v1 8040; GFX11-NEXT: s_waitcnt_depctr 0xfff 8041; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 8042; GFX11-NEXT: v_mul_f32_e32 v0, v2, v0 8043; GFX11-NEXT: s_setpc_b64 s[30:31] 8044; 8045; EG-LABEL: v_fdiv_f32_daz_25ulp_nodenorm_y: 8046; EG: ; %bb.0: 8047; EG-NEXT: CF_END 8048; EG-NEXT: PAD 8049 %div = fdiv float %x, %y, !fpmath !0 8050 ret float %div 8051} 8052 8053attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 8054attributes #1 = { "denormal-fp-math-f32"="ieee,ieee" } 8055attributes #2 = { "denormal-fp-math-f32"="dynamic,dynamic" } 8056 8057!0 = !{float 2.500000e+00} 8058