1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s 5 6define <2 x float> @v_repeat_divisor_f32_x2(float %x, float %y, float %D) #0 { 7; GFX6-LABEL: v_repeat_divisor_f32_x2: 8; GFX6: ; %bb.0: 9; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX6-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v0 11; GFX6-NEXT: v_rcp_f32_e32 v4, v3 12; GFX6-NEXT: v_fma_f32 v5, -v3, v4, 1.0 13; GFX6-NEXT: v_fma_f32 v4, v5, v4, v4 14; GFX6-NEXT: v_div_scale_f32 v5, vcc, v0, v2, v0 15; GFX6-NEXT: v_mul_f32_e32 v6, v5, v4 16; GFX6-NEXT: v_fma_f32 v7, -v3, v6, v5 17; GFX6-NEXT: v_fma_f32 v6, v7, v4, v6 18; GFX6-NEXT: v_fma_f32 v3, -v3, v6, v5 19; GFX6-NEXT: v_div_scale_f32 v5, s[4:5], v2, v2, v1 20; GFX6-NEXT: v_rcp_f32_e32 v7, v5 21; GFX6-NEXT: v_div_fmas_f32 v3, v3, v4, v6 22; GFX6-NEXT: v_div_fixup_f32 v0, v3, v2, v0 23; GFX6-NEXT: v_div_scale_f32 v4, vcc, v1, v2, v1 24; GFX6-NEXT: v_fma_f32 v3, -v5, v7, 1.0 25; GFX6-NEXT: v_fma_f32 v3, v3, v7, v7 26; GFX6-NEXT: v_mul_f32_e32 v6, v4, v3 27; GFX6-NEXT: v_fma_f32 v7, -v5, v6, v4 28; GFX6-NEXT: v_fma_f32 v6, v7, v3, v6 29; GFX6-NEXT: v_fma_f32 v4, -v5, v6, v4 30; GFX6-NEXT: v_div_fmas_f32 v3, v4, v3, v6 31; GFX6-NEXT: v_div_fixup_f32 v1, v3, v2, v1 32; GFX6-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX9-LABEL: v_repeat_divisor_f32_x2: 35; GFX9: ; %bb.0: 36; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX9-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, v0 38; GFX9-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v1 39; GFX9-NEXT: v_div_scale_f32 v5, vcc, v0, v2, v0 40; GFX9-NEXT: v_div_scale_f32 v6, s[4:5], v1, v2, v1 41; GFX9-NEXT: v_rcp_f32_e32 v7, v3 42; GFX9-NEXT: v_rcp_f32_e32 v8, v4 43; GFX9-NEXT: v_fma_f32 v9, -v3, v7, 1.0 44; GFX9-NEXT: v_fma_f32 v7, v9, v7, v7 45; GFX9-NEXT: v_fma_f32 v10, -v4, v8, 1.0 46; GFX9-NEXT: v_fma_f32 v8, v10, v8, v8 47; GFX9-NEXT: v_mul_f32_e32 v9, v5, v7 48; GFX9-NEXT: v_mul_f32_e32 v10, v6, v8 49; GFX9-NEXT: v_fma_f32 v11, -v3, v9, v5 50; GFX9-NEXT: v_fma_f32 v12, -v4, v10, v6 51; GFX9-NEXT: v_fma_f32 v9, v11, v7, v9 52; GFX9-NEXT: v_fma_f32 v10, v12, v8, v10 53; GFX9-NEXT: v_fma_f32 v3, -v3, v9, v5 54; GFX9-NEXT: v_fma_f32 v4, -v4, v10, v6 55; GFX9-NEXT: v_div_fmas_f32 v3, v3, v7, v9 56; GFX9-NEXT: s_mov_b64 vcc, s[4:5] 57; GFX9-NEXT: v_div_fmas_f32 v4, v4, v8, v10 58; GFX9-NEXT: v_div_fixup_f32 v0, v3, v2, v0 59; GFX9-NEXT: v_div_fixup_f32 v1, v4, v2, v1 60; GFX9-NEXT: s_setpc_b64 s[30:31] 61; 62; GFX11-LABEL: v_repeat_divisor_f32_x2: 63; GFX11: ; %bb.0: 64; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 65; GFX11-NEXT: v_div_scale_f32 v3, null, v2, v2, v0 66; GFX11-NEXT: v_div_scale_f32 v4, null, v2, v2, v1 67; GFX11-NEXT: v_div_scale_f32 v9, vcc_lo, v0, v2, v0 68; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 69; GFX11-NEXT: v_rcp_f32_e32 v5, v3 70; GFX11-NEXT: v_rcp_f32_e32 v6, v4 71; GFX11-NEXT: s_waitcnt_depctr 0xfff 72; GFX11-NEXT: v_fma_f32 v7, -v3, v5, 1.0 73; GFX11-NEXT: v_fma_f32 v8, -v4, v6, 1.0 74; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 75; GFX11-NEXT: v_dual_fmac_f32 v5, v7, v5 :: v_dual_fmac_f32 v6, v8, v6 76; GFX11-NEXT: v_div_scale_f32 v7, s0, v1, v2, v1 77; GFX11-NEXT: v_mul_f32_e32 v8, v9, v5 78; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 79; GFX11-NEXT: v_mul_f32_e32 v10, v7, v6 80; GFX11-NEXT: v_fma_f32 v11, -v3, v8, v9 81; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 82; GFX11-NEXT: v_fma_f32 v12, -v4, v10, v7 83; GFX11-NEXT: v_fmac_f32_e32 v8, v11, v5 84; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 85; GFX11-NEXT: v_fmac_f32_e32 v10, v12, v6 86; GFX11-NEXT: v_fma_f32 v3, -v3, v8, v9 87; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 88; GFX11-NEXT: v_fma_f32 v4, -v4, v10, v7 89; GFX11-NEXT: v_div_fmas_f32 v3, v3, v5, v8 90; GFX11-NEXT: s_mov_b32 vcc_lo, s0 91; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 92; GFX11-NEXT: v_div_fmas_f32 v4, v4, v6, v10 93; GFX11-NEXT: v_div_fixup_f32 v0, v3, v2, v0 94; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 95; GFX11-NEXT: v_div_fixup_f32 v1, v4, v2, v1 96; GFX11-NEXT: s_setpc_b64 s[30:31] 97 %div0 = fdiv float %x, %D 98 %div1 = fdiv float %y, %D 99 %insert.0 = insertelement <2 x float> poison, float %div0, i32 0 100 %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1 101 ret <2 x float> %insert.1 102} 103 104define <2 x float> @v_repeat_divisor_f32_x2_arcp(float %x, float %y, float %D) #0 { 105; GFX6-LABEL: v_repeat_divisor_f32_x2_arcp: 106; GFX6: ; %bb.0: 107; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX6-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, 1.0 109; GFX6-NEXT: v_rcp_f32_e32 v4, v3 110; GFX6-NEXT: v_fma_f32 v5, -v3, v4, 1.0 111; GFX6-NEXT: v_fma_f32 v4, v5, v4, v4 112; GFX6-NEXT: v_div_scale_f32 v5, vcc, 1.0, v2, 1.0 113; GFX6-NEXT: v_mul_f32_e32 v6, v5, v4 114; GFX6-NEXT: v_fma_f32 v7, -v3, v6, v5 115; GFX6-NEXT: v_fma_f32 v6, v7, v4, v6 116; GFX6-NEXT: v_fma_f32 v3, -v3, v6, v5 117; GFX6-NEXT: v_div_fmas_f32 v3, v3, v4, v6 118; GFX6-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 119; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 120; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 121; GFX6-NEXT: s_setpc_b64 s[30:31] 122; 123; GFX9-LABEL: v_repeat_divisor_f32_x2_arcp: 124; GFX9: ; %bb.0: 125; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; GFX9-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, 1.0 127; GFX9-NEXT: v_div_scale_f32 v4, vcc, 1.0, v2, 1.0 128; GFX9-NEXT: v_rcp_f32_e32 v5, v3 129; GFX9-NEXT: v_fma_f32 v6, -v3, v5, 1.0 130; GFX9-NEXT: v_fma_f32 v5, v6, v5, v5 131; GFX9-NEXT: v_mul_f32_e32 v6, v4, v5 132; GFX9-NEXT: v_fma_f32 v7, -v3, v6, v4 133; GFX9-NEXT: v_fma_f32 v6, v7, v5, v6 134; GFX9-NEXT: v_fma_f32 v3, -v3, v6, v4 135; GFX9-NEXT: v_div_fmas_f32 v3, v3, v5, v6 136; GFX9-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 137; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 138; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2 139; GFX9-NEXT: s_setpc_b64 s[30:31] 140; 141; GFX11-LABEL: v_repeat_divisor_f32_x2_arcp: 142; GFX11: ; %bb.0: 143; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144; GFX11-NEXT: v_div_scale_f32 v3, null, v2, v2, 1.0 145; GFX11-NEXT: v_div_scale_f32 v6, vcc_lo, 1.0, v2, 1.0 146; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 147; GFX11-NEXT: v_rcp_f32_e32 v4, v3 148; GFX11-NEXT: s_waitcnt_depctr 0xfff 149; GFX11-NEXT: v_fma_f32 v5, -v3, v4, 1.0 150; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v4 151; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 152; GFX11-NEXT: v_mul_f32_e32 v5, v6, v4 153; GFX11-NEXT: v_fma_f32 v7, -v3, v5, v6 154; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 155; GFX11-NEXT: v_fmac_f32_e32 v5, v7, v4 156; GFX11-NEXT: v_fma_f32 v3, -v3, v5, v6 157; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 158; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v5 159; GFX11-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 160; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 161; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 162; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 163; GFX11-NEXT: s_setpc_b64 s[30:31] 164 %div0 = fdiv arcp float %x, %D 165 %div1 = fdiv arcp float %y, %D 166 %insert.0 = insertelement <2 x float> poison, float %div0, i32 0 167 %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1 168 ret <2 x float> %insert.1 169} 170 171define <2 x float> @v_repeat_divisor_f32_x2_arcp_daz(float %x, float %y, float %D) #1 { 172; GFX6-LABEL: v_repeat_divisor_f32_x2_arcp_daz: 173; GFX6: ; %bb.0: 174; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GFX6-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, 1.0 176; GFX6-NEXT: v_rcp_f32_e32 v4, v3 177; GFX6-NEXT: v_div_scale_f32 v5, vcc, 1.0, v2, 1.0 178; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 179; GFX6-NEXT: v_fma_f32 v6, -v3, v4, 1.0 180; GFX6-NEXT: v_fma_f32 v4, v6, v4, v4 181; GFX6-NEXT: v_mul_f32_e32 v6, v5, v4 182; GFX6-NEXT: v_fma_f32 v7, -v3, v6, v5 183; GFX6-NEXT: v_fma_f32 v6, v7, v4, v6 184; GFX6-NEXT: v_fma_f32 v3, -v3, v6, v5 185; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 186; GFX6-NEXT: v_div_fmas_f32 v3, v3, v4, v6 187; GFX6-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 188; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 189; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 190; GFX6-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX9-LABEL: v_repeat_divisor_f32_x2_arcp_daz: 193; GFX9: ; %bb.0: 194; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX9-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, 1.0 196; GFX9-NEXT: v_div_scale_f32 v4, vcc, 1.0, v2, 1.0 197; GFX9-NEXT: v_rcp_f32_e32 v5, v3 198; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 199; GFX9-NEXT: v_fma_f32 v6, -v3, v5, 1.0 200; GFX9-NEXT: v_fma_f32 v5, v6, v5, v5 201; GFX9-NEXT: v_mul_f32_e32 v6, v4, v5 202; GFX9-NEXT: v_fma_f32 v7, -v3, v6, v4 203; GFX9-NEXT: v_fma_f32 v6, v7, v5, v6 204; GFX9-NEXT: v_fma_f32 v3, -v3, v6, v4 205; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 206; GFX9-NEXT: v_div_fmas_f32 v3, v3, v5, v6 207; GFX9-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 208; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 209; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2 210; GFX9-NEXT: s_setpc_b64 s[30:31] 211; 212; GFX11-LABEL: v_repeat_divisor_f32_x2_arcp_daz: 213; GFX11: ; %bb.0: 214; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; GFX11-NEXT: v_div_scale_f32 v3, null, v2, v2, 1.0 216; GFX11-NEXT: v_div_scale_f32 v5, vcc_lo, 1.0, v2, 1.0 217; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) 218; GFX11-NEXT: v_rcp_f32_e32 v4, v3 219; GFX11-NEXT: s_denorm_mode 15 220; GFX11-NEXT: s_waitcnt_depctr 0xfff 221; GFX11-NEXT: v_fma_f32 v6, -v3, v4, 1.0 222; GFX11-NEXT: v_fmac_f32_e32 v4, v6, v4 223; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 224; GFX11-NEXT: v_mul_f32_e32 v6, v5, v4 225; GFX11-NEXT: v_fma_f32 v7, -v3, v6, v5 226; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 227; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v4 228; GFX11-NEXT: v_fma_f32 v3, -v3, v6, v5 229; GFX11-NEXT: s_denorm_mode 12 230; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 231; GFX11-NEXT: v_div_fmas_f32 v3, v3, v4, v6 232; GFX11-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 233; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 234; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 235; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 236; GFX11-NEXT: s_setpc_b64 s[30:31] 237 %div0 = fdiv arcp float %x, %D 238 %div1 = fdiv arcp float %y, %D 239 %insert.0 = insertelement <2 x float> poison, float %div0, i32 0 240 %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1 241 ret <2 x float> %insert.1 242} 243 244define <2 x half> @v_repeat_divisor_f16_x2_arcp(half %x, half %y, half %D) #0 { 245; GFX6-LABEL: v_repeat_divisor_f16_x2_arcp: 246; GFX6: ; %bb.0: 247; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 248; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 249; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 250; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 251; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 252; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 253; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 254; GFX6-NEXT: v_div_scale_f32 v3, s[4:5], v2, v2, 1.0 255; GFX6-NEXT: v_rcp_f32_e32 v4, v3 256; GFX6-NEXT: v_div_scale_f32 v5, vcc, 1.0, v2, 1.0 257; GFX6-NEXT: v_fma_f32 v6, -v3, v4, 1.0 258; GFX6-NEXT: v_fma_f32 v4, v6, v4, v4 259; GFX6-NEXT: v_mul_f32_e32 v6, v5, v4 260; GFX6-NEXT: v_fma_f32 v7, -v3, v6, v5 261; GFX6-NEXT: v_fma_f32 v6, v7, v4, v6 262; GFX6-NEXT: v_fma_f32 v3, -v3, v6, v5 263; GFX6-NEXT: v_div_fmas_f32 v3, v3, v4, v6 264; GFX6-NEXT: v_div_fixup_f32 v2, v3, v2, 1.0 265; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 266; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 267; GFX6-NEXT: s_setpc_b64 s[30:31] 268; 269; GFX9-LABEL: v_repeat_divisor_f16_x2_arcp: 270; GFX9: ; %bb.0: 271; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 272; GFX9-NEXT: v_rcp_f16_e32 v2, v2 273; GFX9-NEXT: v_mul_f16_e32 v0, v0, v2 274; GFX9-NEXT: v_mul_f16_e32 v1, v1, v2 275; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 276; GFX9-NEXT: s_setpc_b64 s[30:31] 277; 278; GFX11-LABEL: v_repeat_divisor_f16_x2_arcp: 279; GFX11: ; %bb.0: 280; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX11-NEXT: v_rcp_f16_e32 v2, v2 282; GFX11-NEXT: s_waitcnt_depctr 0xfff 283; GFX11-NEXT: v_mul_f16_e32 v0, v0, v2 284; GFX11-NEXT: v_mul_f16_e32 v1, v1, v2 285; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 286; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 287; GFX11-NEXT: s_setpc_b64 s[30:31] 288 %div0 = fdiv arcp half %x, %D 289 %div1 = fdiv arcp half %y, %D 290 %insert.0 = insertelement <2 x half> poison, half %div0, i32 0 291 %insert.1 = insertelement <2 x half> %insert.0, half %div1, i32 1 292 ret <2 x half> %insert.1 293} 294 295define <2 x double> @v_repeat_divisor_f64_x2_arcp(double %x, double %y, double %D) #0 { 296; GFX6-LABEL: v_repeat_divisor_f64_x2_arcp: 297; GFX6: ; %bb.0: 298; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 299; GFX6-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[4:5], v[4:5], 1.0 300; GFX6-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] 301; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v5, v7 302; GFX6-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 303; GFX6-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] 304; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[4:5], 1.0 305; GFX6-NEXT: v_fma_f64 v[12:13], -v[6:7], v[8:9], 1.0 306; GFX6-NEXT: s_mov_b32 s4, 0x3ff00000 307; GFX6-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 308; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, v11 309; GFX6-NEXT: v_mul_f64 v[12:13], v[10:11], v[8:9] 310; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc 311; GFX6-NEXT: v_fma_f64 v[6:7], -v[6:7], v[12:13], v[10:11] 312; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[12:13] 313; GFX6-NEXT: v_div_fixup_f64 v[4:5], v[6:7], v[4:5], 1.0 314; GFX6-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] 315; GFX6-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5] 316; GFX6-NEXT: s_setpc_b64 s[30:31] 317; 318; GFX9-LABEL: v_repeat_divisor_f64_x2_arcp: 319; GFX9: ; %bb.0: 320; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 321; GFX9-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[4:5], v[4:5], 1.0 322; GFX9-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] 323; GFX9-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 324; GFX9-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] 325; GFX9-NEXT: v_div_scale_f64 v[10:11], vcc, 1.0, v[4:5], 1.0 326; GFX9-NEXT: v_fma_f64 v[12:13], -v[6:7], v[8:9], 1.0 327; GFX9-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 328; GFX9-NEXT: v_mul_f64 v[12:13], v[10:11], v[8:9] 329; GFX9-NEXT: v_fma_f64 v[6:7], -v[6:7], v[12:13], v[10:11] 330; GFX9-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[12:13] 331; GFX9-NEXT: v_div_fixup_f64 v[4:5], v[6:7], v[4:5], 1.0 332; GFX9-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] 333; GFX9-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5] 334; GFX9-NEXT: s_setpc_b64 s[30:31] 335; 336; GFX11-LABEL: v_repeat_divisor_f64_x2_arcp: 337; GFX11: ; %bb.0: 338; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GFX11-NEXT: v_div_scale_f64 v[6:7], null, v[4:5], v[4:5], 1.0 340; GFX11-NEXT: v_div_scale_f64 v[12:13], vcc_lo, 1.0, v[4:5], 1.0 341; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 342; GFX11-NEXT: v_rcp_f64_e32 v[8:9], v[6:7] 343; GFX11-NEXT: s_waitcnt_depctr 0xfff 344; GFX11-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 345; GFX11-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] 346; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 347; GFX11-NEXT: v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0 348; GFX11-NEXT: v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9] 349; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 350; GFX11-NEXT: v_mul_f64 v[10:11], v[12:13], v[8:9] 351; GFX11-NEXT: v_fma_f64 v[6:7], -v[6:7], v[10:11], v[12:13] 352; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 353; GFX11-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[8:9], v[10:11] 354; GFX11-NEXT: v_div_fixup_f64 v[4:5], v[6:7], v[4:5], 1.0 355; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 356; GFX11-NEXT: v_mul_f64 v[0:1], v[0:1], v[4:5] 357; GFX11-NEXT: v_mul_f64 v[2:3], v[2:3], v[4:5] 358; GFX11-NEXT: s_setpc_b64 s[30:31] 359 %div0 = fdiv arcp double %x, %D 360 %div1 = fdiv arcp double %y, %D 361 %insert.0 = insertelement <2 x double> poison, double %div0, i32 0 362 %insert.1 = insertelement <2 x double> %insert.0, double %div1, i32 1 363 ret <2 x double> %insert.1 364} 365 366define <3 x float> @v_repeat_divisor_f32_x3_arcp(float %x, float %y, float %z, float %D) #0 { 367; GFX6-LABEL: v_repeat_divisor_f32_x3_arcp: 368; GFX6: ; %bb.0: 369; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370; GFX6-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, 1.0 371; GFX6-NEXT: v_rcp_f32_e32 v5, v4 372; GFX6-NEXT: v_fma_f32 v6, -v4, v5, 1.0 373; GFX6-NEXT: v_fma_f32 v5, v6, v5, v5 374; GFX6-NEXT: v_div_scale_f32 v6, vcc, 1.0, v3, 1.0 375; GFX6-NEXT: v_mul_f32_e32 v7, v6, v5 376; GFX6-NEXT: v_fma_f32 v8, -v4, v7, v6 377; GFX6-NEXT: v_fma_f32 v7, v8, v5, v7 378; GFX6-NEXT: v_fma_f32 v4, -v4, v7, v6 379; GFX6-NEXT: v_div_fmas_f32 v4, v4, v5, v7 380; GFX6-NEXT: v_div_fixup_f32 v3, v4, v3, 1.0 381; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 382; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 383; GFX6-NEXT: v_mul_f32_e32 v2, v2, v3 384; GFX6-NEXT: s_setpc_b64 s[30:31] 385; 386; GFX9-LABEL: v_repeat_divisor_f32_x3_arcp: 387; GFX9: ; %bb.0: 388; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389; GFX9-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, 1.0 390; GFX9-NEXT: v_div_scale_f32 v5, vcc, 1.0, v3, 1.0 391; GFX9-NEXT: v_rcp_f32_e32 v6, v4 392; GFX9-NEXT: v_fma_f32 v7, -v4, v6, 1.0 393; GFX9-NEXT: v_fma_f32 v6, v7, v6, v6 394; GFX9-NEXT: v_mul_f32_e32 v7, v5, v6 395; GFX9-NEXT: v_fma_f32 v8, -v4, v7, v5 396; GFX9-NEXT: v_fma_f32 v7, v8, v6, v7 397; GFX9-NEXT: v_fma_f32 v4, -v4, v7, v5 398; GFX9-NEXT: v_div_fmas_f32 v4, v4, v6, v7 399; GFX9-NEXT: v_div_fixup_f32 v3, v4, v3, 1.0 400; GFX9-NEXT: v_mul_f32_e32 v0, v0, v3 401; GFX9-NEXT: v_mul_f32_e32 v1, v1, v3 402; GFX9-NEXT: v_mul_f32_e32 v2, v2, v3 403; GFX9-NEXT: s_setpc_b64 s[30:31] 404; 405; GFX11-LABEL: v_repeat_divisor_f32_x3_arcp: 406; GFX11: ; %bb.0: 407; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408; GFX11-NEXT: v_div_scale_f32 v4, null, v3, v3, 1.0 409; GFX11-NEXT: v_div_scale_f32 v7, vcc_lo, 1.0, v3, 1.0 410; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 411; GFX11-NEXT: v_rcp_f32_e32 v5, v4 412; GFX11-NEXT: s_waitcnt_depctr 0xfff 413; GFX11-NEXT: v_fma_f32 v6, -v4, v5, 1.0 414; GFX11-NEXT: v_fmac_f32_e32 v5, v6, v5 415; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 416; GFX11-NEXT: v_mul_f32_e32 v6, v7, v5 417; GFX11-NEXT: v_fma_f32 v8, -v4, v6, v7 418; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 419; GFX11-NEXT: v_fmac_f32_e32 v6, v8, v5 420; GFX11-NEXT: v_fma_f32 v4, -v4, v6, v7 421; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 422; GFX11-NEXT: v_div_fmas_f32 v4, v4, v5, v6 423; GFX11-NEXT: v_div_fixup_f32 v3, v4, v3, 1.0 424; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 425; GFX11-NEXT: v_mul_f32_e32 v1, v1, v3 426; GFX11-NEXT: v_mul_f32_e32 v0, v0, v3 427; GFX11-NEXT: v_mul_f32_e32 v2, v2, v3 428; GFX11-NEXT: s_setpc_b64 s[30:31] 429 %div0 = fdiv arcp float %x, %D 430 %div1 = fdiv arcp float %y, %D 431 %div2 = fdiv arcp float %z, %D 432 %insert.0 = insertelement <3 x float> poison, float %div0, i32 0 433 %insert.1 = insertelement <3 x float> %insert.0, float %div1, i32 1 434 %insert.2 = insertelement <3 x float> %insert.1, float %div2, i32 2 435 ret <3 x float> %insert.2 436} 437 438define <4 x float> @v_repeat_divisor_f32_x4_arcp(float %x, float %y, float %z, float %w, float %D) #0 { 439; GFX6-LABEL: v_repeat_divisor_f32_x4_arcp: 440; GFX6: ; %bb.0: 441; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 442; GFX6-NEXT: v_div_scale_f32 v5, s[4:5], v4, v4, 1.0 443; GFX6-NEXT: v_rcp_f32_e32 v6, v5 444; GFX6-NEXT: v_fma_f32 v7, -v5, v6, 1.0 445; GFX6-NEXT: v_fma_f32 v6, v7, v6, v6 446; GFX6-NEXT: v_div_scale_f32 v7, vcc, 1.0, v4, 1.0 447; GFX6-NEXT: v_mul_f32_e32 v8, v7, v6 448; GFX6-NEXT: v_fma_f32 v9, -v5, v8, v7 449; GFX6-NEXT: v_fma_f32 v8, v9, v6, v8 450; GFX6-NEXT: v_fma_f32 v5, -v5, v8, v7 451; GFX6-NEXT: v_div_fmas_f32 v5, v5, v6, v8 452; GFX6-NEXT: v_div_fixup_f32 v4, v5, v4, 1.0 453; GFX6-NEXT: v_mul_f32_e32 v0, v0, v4 454; GFX6-NEXT: v_mul_f32_e32 v1, v1, v4 455; GFX6-NEXT: v_mul_f32_e32 v2, v2, v4 456; GFX6-NEXT: v_mul_f32_e32 v3, v3, v4 457; GFX6-NEXT: s_setpc_b64 s[30:31] 458; 459; GFX9-LABEL: v_repeat_divisor_f32_x4_arcp: 460; GFX9: ; %bb.0: 461; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462; GFX9-NEXT: v_div_scale_f32 v5, s[4:5], v4, v4, 1.0 463; GFX9-NEXT: v_div_scale_f32 v6, vcc, 1.0, v4, 1.0 464; GFX9-NEXT: v_rcp_f32_e32 v7, v5 465; GFX9-NEXT: v_fma_f32 v8, -v5, v7, 1.0 466; GFX9-NEXT: v_fma_f32 v7, v8, v7, v7 467; GFX9-NEXT: v_mul_f32_e32 v8, v6, v7 468; GFX9-NEXT: v_fma_f32 v9, -v5, v8, v6 469; GFX9-NEXT: v_fma_f32 v8, v9, v7, v8 470; GFX9-NEXT: v_fma_f32 v5, -v5, v8, v6 471; GFX9-NEXT: v_div_fmas_f32 v5, v5, v7, v8 472; GFX9-NEXT: v_div_fixup_f32 v4, v5, v4, 1.0 473; GFX9-NEXT: v_mul_f32_e32 v0, v0, v4 474; GFX9-NEXT: v_mul_f32_e32 v1, v1, v4 475; GFX9-NEXT: v_mul_f32_e32 v2, v2, v4 476; GFX9-NEXT: v_mul_f32_e32 v3, v3, v4 477; GFX9-NEXT: s_setpc_b64 s[30:31] 478; 479; GFX11-LABEL: v_repeat_divisor_f32_x4_arcp: 480; GFX11: ; %bb.0: 481; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 482; GFX11-NEXT: v_div_scale_f32 v5, null, v4, v4, 1.0 483; GFX11-NEXT: v_div_scale_f32 v8, vcc_lo, 1.0, v4, 1.0 484; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 485; GFX11-NEXT: v_rcp_f32_e32 v6, v5 486; GFX11-NEXT: s_waitcnt_depctr 0xfff 487; GFX11-NEXT: v_fma_f32 v7, -v5, v6, 1.0 488; GFX11-NEXT: v_fmac_f32_e32 v6, v7, v6 489; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 490; GFX11-NEXT: v_mul_f32_e32 v7, v8, v6 491; GFX11-NEXT: v_fma_f32 v9, -v5, v7, v8 492; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 493; GFX11-NEXT: v_fmac_f32_e32 v7, v9, v6 494; GFX11-NEXT: v_fma_f32 v5, -v5, v7, v8 495; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 496; GFX11-NEXT: v_div_fmas_f32 v5, v5, v6, v7 497; GFX11-NEXT: v_div_fixup_f32 v4, v5, v4, 1.0 498; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 499; GFX11-NEXT: v_mul_f32_e32 v0, v0, v4 500; GFX11-NEXT: v_mul_f32_e32 v1, v1, v4 501; GFX11-NEXT: v_mul_f32_e32 v2, v2, v4 502; GFX11-NEXT: v_mul_f32_e32 v3, v3, v4 503; GFX11-NEXT: s_setpc_b64 s[30:31] 504 %div0 = fdiv arcp float %x, %D 505 %div1 = fdiv arcp float %y, %D 506 %div2 = fdiv arcp float %z, %D 507 %div3 = fdiv arcp float %w, %D 508 %insert.0 = insertelement <4 x float> poison, float %div0, i32 0 509 %insert.1 = insertelement <4 x float> %insert.0, float %div1, i32 1 510 %insert.2 = insertelement <4 x float> %insert.1, float %div2, i32 2 511 %insert.3 = insertelement <4 x float> %insert.2, float %div3, i32 3 512 ret <4 x float> %insert.3 513} 514 515define <3 x half> @v_repeat_divisor_f16_x3_arcp(half %x, half %y, half %z, half %D) #0 { 516; GFX6-LABEL: v_repeat_divisor_f16_x3_arcp: 517; GFX6: ; %bb.0: 518; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 519; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 520; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 521; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 522; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 523; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 524; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 525; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 526; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 527; GFX6-NEXT: v_div_scale_f32 v4, s[4:5], v3, v3, 1.0 528; GFX6-NEXT: v_rcp_f32_e32 v5, v4 529; GFX6-NEXT: v_fma_f32 v6, -v4, v5, 1.0 530; GFX6-NEXT: v_fma_f32 v5, v6, v5, v5 531; GFX6-NEXT: v_div_scale_f32 v6, vcc, 1.0, v3, 1.0 532; GFX6-NEXT: v_mul_f32_e32 v7, v6, v5 533; GFX6-NEXT: v_fma_f32 v8, -v4, v7, v6 534; GFX6-NEXT: v_fma_f32 v7, v8, v5, v7 535; GFX6-NEXT: v_fma_f32 v4, -v4, v7, v6 536; GFX6-NEXT: v_div_fmas_f32 v4, v4, v5, v7 537; GFX6-NEXT: v_div_fixup_f32 v3, v4, v3, 1.0 538; GFX6-NEXT: v_mul_f32_e32 v0, v0, v3 539; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 540; GFX6-NEXT: v_mul_f32_e32 v2, v2, v3 541; GFX6-NEXT: s_setpc_b64 s[30:31] 542; 543; GFX9-LABEL: v_repeat_divisor_f16_x3_arcp: 544; GFX9: ; %bb.0: 545; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 546; GFX9-NEXT: v_rcp_f16_e32 v3, v3 547; GFX9-NEXT: v_mul_f16_e32 v0, v0, v3 548; GFX9-NEXT: v_mul_f16_e32 v4, v1, v3 549; GFX9-NEXT: v_mul_f16_e32 v1, v2, v3 550; GFX9-NEXT: v_pack_b32_f16 v0, v0, v4 551; GFX9-NEXT: s_setpc_b64 s[30:31] 552; 553; GFX11-LABEL: v_repeat_divisor_f16_x3_arcp: 554; GFX11: ; %bb.0: 555; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 556; GFX11-NEXT: v_rcp_f16_e32 v3, v3 557; GFX11-NEXT: s_waitcnt_depctr 0xfff 558; GFX11-NEXT: v_mul_f16_e32 v0, v0, v3 559; GFX11-NEXT: v_mul_f16_e32 v1, v1, v3 560; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 561; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 562; GFX11-NEXT: v_mul_f16_e32 v1, v2, v3 563; GFX11-NEXT: s_setpc_b64 s[30:31] 564 %div0 = fdiv arcp half %x, %D 565 %div1 = fdiv arcp half %y, %D 566 %div2 = fdiv arcp half %z, %D 567 %insert.0 = insertelement <3 x half> poison, half %div0, i32 0 568 %insert.1 = insertelement <3 x half> %insert.0, half %div1, i32 1 569 %insert.2 = insertelement <3 x half> %insert.1, half %div2, i32 2 570 ret <3 x half> %insert.2 571} 572 573define <4 x float> @v_repeat_divisor_v2f32_x2(<2 x float> %x, <2 x float> %y, <2 x float> %D) #0 { 574; GFX6-LABEL: v_repeat_divisor_v2f32_x2: 575; GFX6: ; %bb.0: 576; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 577; GFX6-NEXT: v_div_scale_f32 v6, s[4:5], v4, v4, 1.0 578; GFX6-NEXT: v_rcp_f32_e32 v7, v6 579; GFX6-NEXT: v_fma_f32 v8, -v6, v7, 1.0 580; GFX6-NEXT: v_fma_f32 v7, v8, v7, v7 581; GFX6-NEXT: v_div_scale_f32 v8, vcc, 1.0, v4, 1.0 582; GFX6-NEXT: v_mul_f32_e32 v9, v8, v7 583; GFX6-NEXT: v_fma_f32 v10, -v6, v9, v8 584; GFX6-NEXT: v_fma_f32 v9, v10, v7, v9 585; GFX6-NEXT: v_fma_f32 v6, -v6, v9, v8 586; GFX6-NEXT: v_div_scale_f32 v8, s[4:5], v5, v5, 1.0 587; GFX6-NEXT: v_rcp_f32_e32 v10, v8 588; GFX6-NEXT: v_div_fmas_f32 v6, v6, v7, v9 589; GFX6-NEXT: v_div_fixup_f32 v4, v6, v4, 1.0 590; GFX6-NEXT: v_div_scale_f32 v7, vcc, 1.0, v5, 1.0 591; GFX6-NEXT: v_fma_f32 v6, -v8, v10, 1.0 592; GFX6-NEXT: v_fma_f32 v6, v6, v10, v10 593; GFX6-NEXT: v_mul_f32_e32 v9, v7, v6 594; GFX6-NEXT: v_fma_f32 v10, -v8, v9, v7 595; GFX6-NEXT: v_fma_f32 v9, v10, v6, v9 596; GFX6-NEXT: v_fma_f32 v7, -v8, v9, v7 597; GFX6-NEXT: v_div_fmas_f32 v6, v7, v6, v9 598; GFX6-NEXT: v_div_fixup_f32 v5, v6, v5, 1.0 599; GFX6-NEXT: v_mul_f32_e32 v0, v0, v4 600; GFX6-NEXT: v_mul_f32_e32 v1, v1, v5 601; GFX6-NEXT: v_mul_f32_e32 v2, v2, v4 602; GFX6-NEXT: v_mul_f32_e32 v3, v3, v5 603; GFX6-NEXT: s_setpc_b64 s[30:31] 604; 605; GFX9-LABEL: v_repeat_divisor_v2f32_x2: 606; GFX9: ; %bb.0: 607; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 608; GFX9-NEXT: v_div_scale_f32 v6, s[4:5], v4, v4, 1.0 609; GFX9-NEXT: v_div_scale_f32 v7, s[4:5], v5, v5, 1.0 610; GFX9-NEXT: v_div_scale_f32 v8, vcc, 1.0, v4, 1.0 611; GFX9-NEXT: v_div_scale_f32 v9, s[4:5], 1.0, v5, 1.0 612; GFX9-NEXT: v_rcp_f32_e32 v10, v6 613; GFX9-NEXT: v_rcp_f32_e32 v11, v7 614; GFX9-NEXT: v_fma_f32 v12, -v6, v10, 1.0 615; GFX9-NEXT: v_fma_f32 v10, v12, v10, v10 616; GFX9-NEXT: v_fma_f32 v13, -v7, v11, 1.0 617; GFX9-NEXT: v_fma_f32 v11, v13, v11, v11 618; GFX9-NEXT: v_mul_f32_e32 v12, v8, v10 619; GFX9-NEXT: v_mul_f32_e32 v13, v9, v11 620; GFX9-NEXT: v_fma_f32 v14, -v6, v12, v8 621; GFX9-NEXT: v_fma_f32 v15, -v7, v13, v9 622; GFX9-NEXT: v_fma_f32 v12, v14, v10, v12 623; GFX9-NEXT: v_fma_f32 v6, -v6, v12, v8 624; GFX9-NEXT: v_fma_f32 v8, v15, v11, v13 625; GFX9-NEXT: v_div_fmas_f32 v6, v6, v10, v12 626; GFX9-NEXT: v_fma_f32 v7, -v7, v8, v9 627; GFX9-NEXT: s_mov_b64 vcc, s[4:5] 628; GFX9-NEXT: v_div_fmas_f32 v7, v7, v11, v8 629; GFX9-NEXT: v_div_fixup_f32 v4, v6, v4, 1.0 630; GFX9-NEXT: v_mul_f32_e32 v0, v0, v4 631; GFX9-NEXT: v_mul_f32_e32 v2, v2, v4 632; GFX9-NEXT: v_div_fixup_f32 v5, v7, v5, 1.0 633; GFX9-NEXT: v_mul_f32_e32 v1, v1, v5 634; GFX9-NEXT: v_mul_f32_e32 v3, v3, v5 635; GFX9-NEXT: s_setpc_b64 s[30:31] 636; 637; GFX11-LABEL: v_repeat_divisor_v2f32_x2: 638; GFX11: ; %bb.0: 639; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 640; GFX11-NEXT: v_div_scale_f32 v6, null, v4, v4, 1.0 641; GFX11-NEXT: v_div_scale_f32 v7, null, v5, v5, 1.0 642; GFX11-NEXT: v_div_scale_f32 v12, vcc_lo, 1.0, v4, 1.0 643; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 644; GFX11-NEXT: v_rcp_f32_e32 v8, v6 645; GFX11-NEXT: v_rcp_f32_e32 v9, v7 646; GFX11-NEXT: s_waitcnt_depctr 0xfff 647; GFX11-NEXT: v_fma_f32 v10, -v6, v8, 1.0 648; GFX11-NEXT: v_fma_f32 v11, -v7, v9, 1.0 649; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 650; GFX11-NEXT: v_dual_fmac_f32 v8, v10, v8 :: v_dual_fmac_f32 v9, v11, v9 651; GFX11-NEXT: v_div_scale_f32 v10, s0, 1.0, v5, 1.0 652; GFX11-NEXT: v_mul_f32_e32 v11, v12, v8 653; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 654; GFX11-NEXT: v_fma_f32 v14, -v6, v11, v12 655; GFX11-NEXT: v_fmac_f32_e32 v11, v14, v8 656; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 657; GFX11-NEXT: v_fma_f32 v6, -v6, v11, v12 658; GFX11-NEXT: v_div_fmas_f32 v6, v6, v8, v11 659; GFX11-NEXT: s_mov_b32 vcc_lo, s0 660; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 661; GFX11-NEXT: v_div_fixup_f32 v4, v6, v4, 1.0 662; GFX11-NEXT: v_dual_mul_f32 v13, v10, v9 :: v_dual_mul_f32 v0, v0, v4 663; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 664; GFX11-NEXT: v_fma_f32 v15, -v7, v13, v10 665; GFX11-NEXT: v_dual_mul_f32 v2, v2, v4 :: v_dual_fmac_f32 v13, v15, v9 666; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 667; GFX11-NEXT: v_fma_f32 v7, -v7, v13, v10 668; GFX11-NEXT: v_div_fmas_f32 v7, v7, v9, v13 669; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 670; GFX11-NEXT: v_div_fixup_f32 v5, v7, v5, 1.0 671; GFX11-NEXT: v_mul_f32_e32 v1, v1, v5 672; GFX11-NEXT: v_mul_f32_e32 v3, v3, v5 673; GFX11-NEXT: s_setpc_b64 s[30:31] 674 %div0 = fdiv arcp <2 x float> %x, %D 675 %div1 = fdiv arcp <2 x float> %y, %D 676 %shuffle = shufflevector <2 x float> %div0, <2 x float> %div1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 677 ret <4 x float> %shuffle 678} 679 680define <2 x float> @v_repeat_divisor_f32_x2_ulp25(float %x, float %y, float %D) #0 { 681; GFX6-LABEL: v_repeat_divisor_f32_x2_ulp25: 682; GFX6: ; %bb.0: 683; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 684; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 685; GFX6-NEXT: v_frexp_mant_f32_e32 v3, v2 686; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, s4 687; GFX6-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc 688; GFX6-NEXT: v_rcp_f32_e32 v3, v3 689; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v2, v2 690; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v2 691; GFX6-NEXT: v_ldexp_f32_e32 v2, v3, v2 692; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 693; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 694; GFX6-NEXT: s_setpc_b64 s[30:31] 695; 696; GFX9-LABEL: v_repeat_divisor_f32_x2_ulp25: 697; GFX9: ; %bb.0: 698; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 699; GFX9-NEXT: v_frexp_mant_f32_e32 v3, v2 700; GFX9-NEXT: v_rcp_f32_e32 v3, v3 701; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v2, v2 702; GFX9-NEXT: v_sub_u32_e32 v2, 0, v2 703; GFX9-NEXT: v_ldexp_f32 v2, v3, v2 704; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 705; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2 706; GFX9-NEXT: s_setpc_b64 s[30:31] 707; 708; GFX11-LABEL: v_repeat_divisor_f32_x2_ulp25: 709; GFX11: ; %bb.0: 710; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 711; GFX11-NEXT: v_frexp_mant_f32_e32 v3, v2 712; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v2, v2 713; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 714; GFX11-NEXT: v_rcp_f32_e32 v3, v3 715; GFX11-NEXT: v_sub_nc_u32_e32 v2, 0, v2 716; GFX11-NEXT: s_waitcnt_depctr 0xfff 717; GFX11-NEXT: v_ldexp_f32 v2, v3, v2 718; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 719; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 720; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 721; GFX11-NEXT: s_setpc_b64 s[30:31] 722 %div0 = fdiv arcp float %x, %D, !fpmath !0 723 %div1 = fdiv arcp float %y, %D, !fpmath !0 724 %insert.0 = insertelement <2 x float> poison, float %div0, i32 0 725 %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1 726 ret <2 x float> %insert.1 727} 728 729define <2 x float> @v_repeat_divisor_f32_x2_daz_ulp25(float %x, float %y, float %D) #1 { 730; GFX6-LABEL: v_repeat_divisor_f32_x2_daz_ulp25: 731; GFX6: ; %bb.0: 732; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 733; GFX6-NEXT: v_rcp_f32_e32 v2, v2 734; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 735; GFX6-NEXT: v_mul_f32_e32 v1, v1, v2 736; GFX6-NEXT: s_setpc_b64 s[30:31] 737; 738; GFX9-LABEL: v_repeat_divisor_f32_x2_daz_ulp25: 739; GFX9: ; %bb.0: 740; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 741; GFX9-NEXT: v_rcp_f32_e32 v2, v2 742; GFX9-NEXT: v_mul_f32_e32 v0, v0, v2 743; GFX9-NEXT: v_mul_f32_e32 v1, v1, v2 744; GFX9-NEXT: s_setpc_b64 s[30:31] 745; 746; GFX11-LABEL: v_repeat_divisor_f32_x2_daz_ulp25: 747; GFX11: ; %bb.0: 748; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 749; GFX11-NEXT: v_rcp_f32_e32 v2, v2 750; GFX11-NEXT: s_waitcnt_depctr 0xfff 751; GFX11-NEXT: v_mul_f32_e32 v0, v0, v2 752; GFX11-NEXT: v_mul_f32_e32 v1, v1, v2 753; GFX11-NEXT: s_setpc_b64 s[30:31] 754 %div0 = fdiv arcp float %x, %D, !fpmath !0 755 %div1 = fdiv arcp float %y, %D, !fpmath !0 756 %insert.0 = insertelement <2 x float> poison, float %div0, i32 0 757 %insert.1 = insertelement <2 x float> %insert.0, float %div1, i32 1 758 ret <2 x float> %insert.1 759} 760 761define <4 x half> @v_repeat_divisor_v2f16_x2(<2 x half> %x, <2 x half> %y, <2 x half> %D) #0 { 762; GFX6-LABEL: v_repeat_divisor_v2f16_x2: 763; GFX6: ; %bb.0: 764; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 765; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 766; GFX6-NEXT: v_cvt_f16_f32_e32 v5, v5 767; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 768; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 769; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 770; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 771; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 772; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 773; GFX6-NEXT: v_div_scale_f32 v6, s[4:5], v4, v4, 1.0 774; GFX6-NEXT: v_rcp_f32_e32 v7, v6 775; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 776; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 777; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 778; GFX6-NEXT: v_fma_f32 v8, -v6, v7, 1.0 779; GFX6-NEXT: v_fma_f32 v7, v8, v7, v7 780; GFX6-NEXT: v_div_scale_f32 v8, vcc, 1.0, v4, 1.0 781; GFX6-NEXT: v_mul_f32_e32 v9, v8, v7 782; GFX6-NEXT: v_fma_f32 v10, -v6, v9, v8 783; GFX6-NEXT: v_fma_f32 v9, v10, v7, v9 784; GFX6-NEXT: v_fma_f32 v6, -v6, v9, v8 785; GFX6-NEXT: v_div_scale_f32 v8, s[4:5], v5, v5, 1.0 786; GFX6-NEXT: v_rcp_f32_e32 v10, v8 787; GFX6-NEXT: v_div_fmas_f32 v6, v6, v7, v9 788; GFX6-NEXT: v_div_fixup_f32 v4, v6, v4, 1.0 789; GFX6-NEXT: v_div_scale_f32 v7, vcc, 1.0, v5, 1.0 790; GFX6-NEXT: v_fma_f32 v6, -v8, v10, 1.0 791; GFX6-NEXT: v_fma_f32 v6, v6, v10, v10 792; GFX6-NEXT: v_mul_f32_e32 v9, v7, v6 793; GFX6-NEXT: v_fma_f32 v10, -v8, v9, v7 794; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 795; GFX6-NEXT: v_fma_f32 v9, v10, v6, v9 796; GFX6-NEXT: v_fma_f32 v7, -v8, v9, v7 797; GFX6-NEXT: v_div_fmas_f32 v6, v7, v6, v9 798; GFX6-NEXT: v_div_fixup_f32 v5, v6, v5, 1.0 799; GFX6-NEXT: v_mul_f32_e32 v0, v0, v4 800; GFX6-NEXT: v_mul_f32_e32 v1, v1, v5 801; GFX6-NEXT: v_mul_f32_e32 v2, v2, v4 802; GFX6-NEXT: v_mul_f32_e32 v3, v3, v5 803; GFX6-NEXT: s_setpc_b64 s[30:31] 804; 805; GFX9-LABEL: v_repeat_divisor_v2f16_x2: 806; GFX9: ; %bb.0: 807; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 808; GFX9-NEXT: v_rcp_f16_sdwa v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 809; GFX9-NEXT: v_rcp_f16_e32 v2, v2 810; GFX9-NEXT: v_pack_b32_f16 v2, v2, v3 811; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 812; GFX9-NEXT: v_pk_mul_f16 v1, v1, v2 813; GFX9-NEXT: s_setpc_b64 s[30:31] 814; 815; GFX11-LABEL: v_repeat_divisor_v2f16_x2: 816; GFX11: ; %bb.0: 817; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 818; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v2 819; GFX11-NEXT: v_rcp_f16_e32 v2, v2 820; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 821; GFX11-NEXT: v_rcp_f16_e32 v3, v3 822; GFX11-NEXT: s_waitcnt_depctr 0xfff 823; GFX11-NEXT: v_pack_b32_f16 v2, v2, v3 824; GFX11-NEXT: v_pk_mul_f16 v0, v0, v2 825; GFX11-NEXT: v_pk_mul_f16 v1, v1, v2 826; GFX11-NEXT: s_setpc_b64 s[30:31] 827 %div0 = fdiv arcp <2 x half> %x, %D 828 %div1 = fdiv arcp <2 x half> %y, %D 829 %shuffle = shufflevector <2 x half> %div0, <2 x half> %div1, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 830 ret <4 x half> %shuffle 831} 832 833define <6 x half> @v_repeat_divisor_v3f16_x2(<3 x half> %x, <3 x half> %y, <3 x half> %D) #0 { 834; GFX6-LABEL: v_repeat_divisor_v3f16_x2: 835; GFX6: ; %bb.0: 836; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 837; GFX6-NEXT: v_cvt_f16_f32_e32 v6, v6 838; GFX6-NEXT: v_cvt_f16_f32_e32 v7, v7 839; GFX6-NEXT: v_cvt_f16_f32_e32 v8, v8 840; GFX6-NEXT: v_cvt_f16_f32_e32 v5, v5 841; GFX6-NEXT: v_cvt_f32_f16_e32 v6, v6 842; GFX6-NEXT: v_cvt_f32_f16_e32 v7, v7 843; GFX6-NEXT: v_cvt_f32_f16_e32 v8, v8 844; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 845; GFX6-NEXT: v_div_scale_f32 v9, s[4:5], v6, v6, 1.0 846; GFX6-NEXT: v_rcp_f32_e32 v10, v9 847; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 848; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 849; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 850; GFX6-NEXT: v_fma_f32 v11, -v9, v10, 1.0 851; GFX6-NEXT: v_fma_f32 v10, v11, v10, v10 852; GFX6-NEXT: v_div_scale_f32 v11, vcc, 1.0, v6, 1.0 853; GFX6-NEXT: v_mul_f32_e32 v12, v11, v10 854; GFX6-NEXT: v_fma_f32 v13, -v9, v12, v11 855; GFX6-NEXT: v_fma_f32 v12, v13, v10, v12 856; GFX6-NEXT: v_fma_f32 v9, -v9, v12, v11 857; GFX6-NEXT: v_div_scale_f32 v11, s[4:5], v7, v7, 1.0 858; GFX6-NEXT: v_rcp_f32_e32 v13, v11 859; GFX6-NEXT: v_div_fmas_f32 v9, v9, v10, v12 860; GFX6-NEXT: v_div_fixup_f32 v6, v9, v6, 1.0 861; GFX6-NEXT: v_div_scale_f32 v10, vcc, 1.0, v7, 1.0 862; GFX6-NEXT: v_fma_f32 v9, -v11, v13, 1.0 863; GFX6-NEXT: v_fma_f32 v9, v9, v13, v13 864; GFX6-NEXT: v_mul_f32_e32 v12, v10, v9 865; GFX6-NEXT: v_fma_f32 v13, -v11, v12, v10 866; GFX6-NEXT: v_fma_f32 v12, v13, v9, v12 867; GFX6-NEXT: v_fma_f32 v10, -v11, v12, v10 868; GFX6-NEXT: v_div_scale_f32 v11, s[4:5], v8, v8, 1.0 869; GFX6-NEXT: v_rcp_f32_e32 v13, v11 870; GFX6-NEXT: v_div_fmas_f32 v9, v10, v9, v12 871; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 872; GFX6-NEXT: v_div_fixup_f32 v7, v9, v7, 1.0 873; GFX6-NEXT: v_fma_f32 v9, -v11, v13, 1.0 874; GFX6-NEXT: v_fma_f32 v9, v9, v13, v13 875; GFX6-NEXT: v_div_scale_f32 v10, vcc, 1.0, v8, 1.0 876; GFX6-NEXT: v_mul_f32_e32 v12, v10, v9 877; GFX6-NEXT: v_fma_f32 v13, -v11, v12, v10 878; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 879; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 880; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 881; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 882; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 883; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 884; GFX6-NEXT: v_fma_f32 v12, v13, v9, v12 885; GFX6-NEXT: v_fma_f32 v10, -v11, v12, v10 886; GFX6-NEXT: v_div_fmas_f32 v9, v10, v9, v12 887; GFX6-NEXT: v_div_fixup_f32 v8, v9, v8, 1.0 888; GFX6-NEXT: v_mul_f32_e32 v0, v0, v6 889; GFX6-NEXT: v_mul_f32_e32 v1, v1, v7 890; GFX6-NEXT: v_mul_f32_e32 v2, v2, v8 891; GFX6-NEXT: v_mul_f32_e32 v3, v3, v6 892; GFX6-NEXT: v_mul_f32_e32 v4, v4, v7 893; GFX6-NEXT: v_mul_f32_e32 v5, v5, v8 894; GFX6-NEXT: s_setpc_b64 s[30:31] 895; 896; GFX9-LABEL: v_repeat_divisor_v3f16_x2: 897; GFX9: ; %bb.0: 898; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 899; GFX9-NEXT: v_rcp_f16_sdwa v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 900; GFX9-NEXT: v_rcp_f16_e32 v4, v4 901; GFX9-NEXT: v_rcp_f16_e32 v5, v5 902; GFX9-NEXT: s_movk_i32 s4, 0x7e00 903; GFX9-NEXT: v_pack_b32_f16 v4, v4, v6 904; GFX9-NEXT: v_pack_b32_f16 v5, v5, s4 905; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 906; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 907; GFX9-NEXT: v_pk_mul_f16 v3, v3, v5 908; GFX9-NEXT: v_pk_mul_f16 v4, v2, v4 909; GFX9-NEXT: v_alignbit_b32 v2, v3, v4, 16 910; GFX9-NEXT: v_pack_b32_f16 v1, v1, v4 911; GFX9-NEXT: s_setpc_b64 s[30:31] 912; 913; GFX11-LABEL: v_repeat_divisor_v3f16_x2: 914; GFX11: ; %bb.0: 915; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 916; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v4 917; GFX11-NEXT: v_rcp_f16_e32 v4, v4 918; GFX11-NEXT: v_rcp_f16_e32 v5, v5 919; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) 920; GFX11-NEXT: v_rcp_f16_e32 v6, v6 921; GFX11-NEXT: s_waitcnt_depctr 0xfff 922; GFX11-NEXT: v_pack_b32_f16 v5, v5, 0x7e00 923; GFX11-NEXT: v_pack_b32_f16 v4, v4, v6 924; GFX11-NEXT: v_pk_mul_f16 v1, v1, v5 925; GFX11-NEXT: v_pk_mul_f16 v3, v3, v5 926; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) 927; GFX11-NEXT: v_pk_mul_f16 v2, v2, v4 928; GFX11-NEXT: v_pk_mul_f16 v0, v0, v4 929; GFX11-NEXT: v_pack_b32_f16 v1, v1, v2 930; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 931; GFX11-NEXT: v_alignbit_b32 v2, v3, v2, 16 932; GFX11-NEXT: s_setpc_b64 s[30:31] 933 %div0 = fdiv arcp <3 x half> %x, %D 934 %div1 = fdiv arcp <3 x half> %y, %D 935 %shuffle = shufflevector <3 x half> %div0, <3 x half> %div1, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5> 936 ret <6 x half> %shuffle 937} 938 939attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" } 940attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 941 942!0 = !{float 2.5} 943;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 944; GCN: {{.*}} 945