1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s 6 7define <2 x half> @v_fmul_v2f16(<2 x half> %a, <2 x half> %b) { 8; GFX9-LABEL: v_fmul_v2f16: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 12; GFX9-NEXT: s_setpc_b64 s[30:31] 13; 14; GFX8-LABEL: v_fmul_v2f16: 15; GFX8: ; %bb.0: 16; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1 18; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 19; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 20; GFX8-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX10-LABEL: v_fmul_v2f16: 23; GFX10: ; %bb.0: 24; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 26; GFX10-NEXT: s_setpc_b64 s[30:31] 27 %mul = fmul <2 x half> %a, %b 28 ret <2 x half> %mul 29} 30 31define <2 x half> @v_fmul_v2f16_fneg_lhs(<2 x half> %a, <2 x half> %b) { 32; GFX9-LABEL: v_fmul_v2f16_fneg_lhs: 33; GFX9: ; %bb.0: 34; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] 36; GFX9-NEXT: s_setpc_b64 s[30:31] 37; 38; GFX8-LABEL: v_fmul_v2f16_fneg_lhs: 39; GFX8: ; %bb.0: 40; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 42; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1 43; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 44; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 45; GFX8-NEXT: s_setpc_b64 s[30:31] 46; 47; GFX10-LABEL: v_fmul_v2f16_fneg_lhs: 48; GFX10: ; %bb.0: 49; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] 51; GFX10-NEXT: s_setpc_b64 s[30:31] 52 %neg.a = fneg <2 x half> %a 53 %mul = fmul <2 x half> %neg.a, %b 54 ret <2 x half> %mul 55} 56 57define <2 x half> @v_fmul_v2f16_fneg_rhs(<2 x half> %a, <2 x half> %b) { 58; GFX9-LABEL: v_fmul_v2f16_fneg_rhs: 59; GFX9: ; %bb.0: 60; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] 62; GFX9-NEXT: s_setpc_b64 s[30:31] 63; 64; GFX8-LABEL: v_fmul_v2f16_fneg_rhs: 65; GFX8: ; %bb.0: 66; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 67; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 68; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1 69; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 70; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 71; GFX8-NEXT: s_setpc_b64 s[30:31] 72; 73; GFX10-LABEL: v_fmul_v2f16_fneg_rhs: 74; GFX10: ; %bb.0: 75; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] 77; GFX10-NEXT: s_setpc_b64 s[30:31] 78 %neg.b = fneg <2 x half> %b 79 %mul = fmul <2 x half> %a, %neg.b 80 ret <2 x half> %mul 81} 82 83define <2 x half> @v_fmul_v2f16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) { 84; GFX9-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs: 85; GFX9: ; %bb.0: 86; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 88; GFX9-NEXT: s_setpc_b64 s[30:31] 89; 90; GFX8-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs: 91; GFX8: ; %bb.0: 92; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 93; GFX8-NEXT: v_mul_f16_e32 v2, v0, v1 94; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 95; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 96; GFX8-NEXT: s_setpc_b64 s[30:31] 97; 98; GFX10-LABEL: v_fmul_v2f16_fneg_lhs_fneg_rhs: 99; GFX10: ; %bb.0: 100; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; GFX10-NEXT: v_pk_mul_f16 v0, v0, v1 102; GFX10-NEXT: s_setpc_b64 s[30:31] 103 %neg.a = fneg <2 x half> %a 104 %neg.b = fneg <2 x half> %b 105 %mul = fmul <2 x half> %neg.a, %neg.b 106 ret <2 x half> %mul 107} 108 109define <3 x half> @v_fmul_v3f16(<3 x half> %a, <3 x half> %b) { 110; GFX9-LABEL: v_fmul_v3f16: 111; GFX9: ; %bb.0: 112; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 114; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 115; GFX9-NEXT: s_setpc_b64 s[30:31] 116; 117; GFX8-LABEL: v_fmul_v3f16: 118; GFX8: ; %bb.0: 119; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 121; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 122; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 123; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 124; GFX8-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX10-LABEL: v_fmul_v3f16: 127; GFX10: ; %bb.0: 128; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 130; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 131; GFX10-NEXT: s_setpc_b64 s[30:31] 132 %mul = fmul <3 x half> %a, %b 133 ret <3 x half> %mul 134} 135 136define <3 x half> @v_fmul_v3f16_fneg_lhs(<3 x half> %a, <3 x half> %b) { 137; GFX9-LABEL: v_fmul_v3f16_fneg_lhs: 138; GFX9: ; %bb.0: 139; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0] 141; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0] 142; GFX9-NEXT: s_setpc_b64 s[30:31] 143; 144; GFX8-LABEL: v_fmul_v3f16_fneg_lhs: 145; GFX8: ; %bb.0: 146; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 148; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 149; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 150; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 151; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 152; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 153; GFX8-NEXT: s_setpc_b64 s[30:31] 154; 155; GFX10-LABEL: v_fmul_v3f16_fneg_lhs: 156; GFX10: ; %bb.0: 157; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0] 159; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0] 160; GFX10-NEXT: s_setpc_b64 s[30:31] 161 %neg.a = fneg <3 x half> %a 162 %mul = fmul <3 x half> %neg.a, %b 163 ret <3 x half> %mul 164} 165 166define <3 x half> @v_fmul_v3f16_fneg_rhs(<3 x half> %a, <3 x half> %b) { 167; GFX9-LABEL: v_fmul_v3f16_fneg_rhs: 168; GFX9: ; %bb.0: 169; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] 171; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] 172; GFX9-NEXT: s_setpc_b64 s[30:31] 173; 174; GFX8-LABEL: v_fmul_v3f16_fneg_rhs: 175; GFX8: ; %bb.0: 176; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 178; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 179; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 180; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 181; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 182; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 183; GFX8-NEXT: s_setpc_b64 s[30:31] 184; 185; GFX10-LABEL: v_fmul_v3f16_fneg_rhs: 186; GFX10: ; %bb.0: 187; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 188; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] 189; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] 190; GFX10-NEXT: s_setpc_b64 s[30:31] 191 %neg.b = fneg <3 x half> %b 192 %mul = fmul <3 x half> %a, %neg.b 193 ret <3 x half> %mul 194} 195 196define <3 x half> @v_fmul_v3f16_fneg_lhs_fneg_rhs(<3 x half> %a, <3 x half> %b) { 197; GFX9-LABEL: v_fmul_v3f16_fneg_lhs_fneg_rhs: 198; GFX9: ; %bb.0: 199; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 200; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 201; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 202; GFX9-NEXT: s_setpc_b64 s[30:31] 203; 204; GFX8-LABEL: v_fmul_v3f16_fneg_lhs_fneg_rhs: 205; GFX8: ; %bb.0: 206; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 207; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 208; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 209; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3 210; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 211; GFX8-NEXT: s_setpc_b64 s[30:31] 212; 213; GFX10-LABEL: v_fmul_v3f16_fneg_lhs_fneg_rhs: 214; GFX10: ; %bb.0: 215; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 216; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 217; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 218; GFX10-NEXT: s_setpc_b64 s[30:31] 219 %neg.a = fneg <3 x half> %a 220 %neg.b = fneg <3 x half> %b 221 %mul = fmul <3 x half> %neg.a, %neg.b 222 ret <3 x half> %mul 223} 224 225define <4 x half> @v_fmul_v4f16(<4 x half> %a, <4 x half> %b) { 226; GFX9-LABEL: v_fmul_v4f16: 227; GFX9: ; %bb.0: 228; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 229; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 230; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 231; GFX9-NEXT: s_setpc_b64 s[30:31] 232; 233; GFX8-LABEL: v_fmul_v4f16: 234; GFX8: ; %bb.0: 235; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 236; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 237; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 238; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3 239; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 240; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 241; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 242; GFX8-NEXT: s_setpc_b64 s[30:31] 243; 244; GFX10-LABEL: v_fmul_v4f16: 245; GFX10: ; %bb.0: 246; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 247; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 248; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 249; GFX10-NEXT: s_setpc_b64 s[30:31] 250 %mul = fmul <4 x half> %a, %b 251 ret <4 x half> %mul 252} 253 254define <4 x half> @v_fmul_v4f16_fneg_lhs(<4 x half> %a, <4 x half> %b) { 255; GFX9-LABEL: v_fmul_v4f16_fneg_lhs: 256; GFX9: ; %bb.0: 257; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 258; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0] 259; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0] 260; GFX9-NEXT: s_setpc_b64 s[30:31] 261; 262; GFX8-LABEL: v_fmul_v4f16_fneg_lhs: 263; GFX8: ; %bb.0: 264; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 265; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 266; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 267; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 268; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 269; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3 270; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 271; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 272; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 273; GFX8-NEXT: s_setpc_b64 s[30:31] 274; 275; GFX10-LABEL: v_fmul_v4f16_fneg_lhs: 276; GFX10: ; %bb.0: 277; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 278; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[1,0] neg_hi:[1,0] 279; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[1,0] neg_hi:[1,0] 280; GFX10-NEXT: s_setpc_b64 s[30:31] 281 %neg.a = fneg <4 x half> %a 282 %mul = fmul <4 x half> %neg.a, %b 283 ret <4 x half> %mul 284} 285 286define <4 x half> @v_fmul_v4f16_fneg_rhs(<4 x half> %a, <4 x half> %b) { 287; GFX9-LABEL: v_fmul_v4f16_fneg_rhs: 288; GFX9: ; %bb.0: 289; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 290; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] 291; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] 292; GFX9-NEXT: s_setpc_b64 s[30:31] 293; 294; GFX8-LABEL: v_fmul_v4f16_fneg_rhs: 295; GFX8: ; %bb.0: 296; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 297; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 298; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 299; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 300; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 301; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3 302; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 303; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 304; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 305; GFX8-NEXT: s_setpc_b64 s[30:31] 306; 307; GFX10-LABEL: v_fmul_v4f16_fneg_rhs: 308; GFX10: ; %bb.0: 309; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 310; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1] 311; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1] 312; GFX10-NEXT: s_setpc_b64 s[30:31] 313 %neg.b = fneg <4 x half> %b 314 %mul = fmul <4 x half> %a, %neg.b 315 ret <4 x half> %mul 316} 317 318define <4 x half> @v_fmul_v4f16_fneg_lhs_fneg_rhs(<4 x half> %a, <4 x half> %b) { 319; GFX9-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs: 320; GFX9: ; %bb.0: 321; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 323; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 324; GFX9-NEXT: s_setpc_b64 s[30:31] 325; 326; GFX8-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs: 327; GFX8: ; %bb.0: 328; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 329; GFX8-NEXT: v_mul_f16_e32 v4, v0, v2 330; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 331; GFX8-NEXT: v_mul_f16_e32 v2, v1, v3 332; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 333; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 334; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 335; GFX8-NEXT: s_setpc_b64 s[30:31] 336; 337; GFX10-LABEL: v_fmul_v4f16_fneg_lhs_fneg_rhs: 338; GFX10: ; %bb.0: 339; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 340; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 341; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 342; GFX10-NEXT: s_setpc_b64 s[30:31] 343 %neg.a = fneg <4 x half> %a 344 %neg.b = fneg <4 x half> %b 345 %mul = fmul <4 x half> %neg.a, %neg.b 346 ret <4 x half> %mul 347} 348 349define <6 x half> @v_fmul_v6f16(<6 x half> %a, <6 x half> %b) { 350; GFX9-LABEL: v_fmul_v6f16: 351; GFX9: ; %bb.0: 352; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 353; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 354; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 355; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 356; GFX9-NEXT: s_setpc_b64 s[30:31] 357; 358; GFX8-LABEL: v_fmul_v6f16: 359; GFX8: ; %bb.0: 360; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 361; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3 362; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 363; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4 364; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 365; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5 366; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 367; GFX8-NEXT: v_or_b32_e32 v0, v6, v0 368; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 369; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 370; GFX8-NEXT: s_setpc_b64 s[30:31] 371; 372; GFX10-LABEL: v_fmul_v6f16: 373; GFX10: ; %bb.0: 374; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 375; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 376; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 377; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 378; GFX10-NEXT: s_setpc_b64 s[30:31] 379 %mul = fmul <6 x half> %a, %b 380 ret <6 x half> %mul 381} 382 383define <6 x half> @v_fmul_v6f16_fneg_lhs(<6 x half> %a, <6 x half> %b) { 384; GFX9-LABEL: v_fmul_v6f16_fneg_lhs: 385; GFX9: ; %bb.0: 386; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 387; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[1,0] neg_hi:[1,0] 388; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[1,0] neg_hi:[1,0] 389; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[1,0] neg_hi:[1,0] 390; GFX9-NEXT: s_setpc_b64 s[30:31] 391; 392; GFX8-LABEL: v_fmul_v6f16_fneg_lhs: 393; GFX8: ; %bb.0: 394; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 396; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 397; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 398; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3 399; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 400; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4 401; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 402; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5 403; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 404; GFX8-NEXT: v_or_b32_e32 v0, v6, v0 405; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 406; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 407; GFX8-NEXT: s_setpc_b64 s[30:31] 408; 409; GFX10-LABEL: v_fmul_v6f16_fneg_lhs: 410; GFX10: ; %bb.0: 411; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 412; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[1,0] neg_hi:[1,0] 413; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[1,0] neg_hi:[1,0] 414; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[1,0] neg_hi:[1,0] 415; GFX10-NEXT: s_setpc_b64 s[30:31] 416 %neg.a = fneg <6 x half> %a 417 %mul = fmul <6 x half> %neg.a, %b 418 ret <6 x half> %mul 419} 420 421define <6 x half> @v_fmul_v6f16_fneg_rhs(<6 x half> %a, <6 x half> %b) { 422; GFX9-LABEL: v_fmul_v6f16_fneg_rhs: 423; GFX9: ; %bb.0: 424; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[0,1] neg_hi:[0,1] 426; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[0,1] neg_hi:[0,1] 427; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1] 428; GFX9-NEXT: s_setpc_b64 s[30:31] 429; 430; GFX8-LABEL: v_fmul_v6f16_fneg_rhs: 431; GFX8: ; %bb.0: 432; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 433; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 434; GFX8-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 435; GFX8-NEXT: v_xor_b32_e32 v5, 0x80008000, v5 436; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3 437; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 438; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4 439; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 440; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5 441; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 442; GFX8-NEXT: v_or_b32_e32 v0, v6, v0 443; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 444; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 445; GFX8-NEXT: s_setpc_b64 s[30:31] 446; 447; GFX10-LABEL: v_fmul_v6f16_fneg_rhs: 448; GFX10: ; %bb.0: 449; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 450; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 neg_lo:[0,1] neg_hi:[0,1] 451; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 neg_lo:[0,1] neg_hi:[0,1] 452; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 neg_lo:[0,1] neg_hi:[0,1] 453; GFX10-NEXT: s_setpc_b64 s[30:31] 454 %neg.b = fneg <6 x half> %b 455 %mul = fmul <6 x half> %a, %neg.b 456 ret <6 x half> %mul 457} 458 459define <6 x half> @v_fmul_v6f16_fneg_lhs_fneg_rhs(<6 x half> %a, <6 x half> %b) { 460; GFX9-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs: 461; GFX9: ; %bb.0: 462; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 463; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3 464; GFX9-NEXT: v_pk_mul_f16 v1, v1, v4 465; GFX9-NEXT: v_pk_mul_f16 v2, v2, v5 466; GFX9-NEXT: s_setpc_b64 s[30:31] 467; 468; GFX8-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs: 469; GFX8: ; %bb.0: 470; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 471; GFX8-NEXT: v_mul_f16_e32 v6, v0, v3 472; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 473; GFX8-NEXT: v_mul_f16_e32 v3, v1, v4 474; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 475; GFX8-NEXT: v_mul_f16_e32 v4, v2, v5 476; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 477; GFX8-NEXT: v_or_b32_e32 v0, v6, v0 478; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 479; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 480; GFX8-NEXT: s_setpc_b64 s[30:31] 481; 482; GFX10-LABEL: v_fmul_v6f16_fneg_lhs_fneg_rhs: 483; GFX10: ; %bb.0: 484; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 485; GFX10-NEXT: v_pk_mul_f16 v0, v0, v3 486; GFX10-NEXT: v_pk_mul_f16 v1, v1, v4 487; GFX10-NEXT: v_pk_mul_f16 v2, v2, v5 488; GFX10-NEXT: s_setpc_b64 s[30:31] 489 %neg.a = fneg <6 x half> %a 490 %neg.b = fneg <6 x half> %b 491 %mul = fmul <6 x half> %neg.a, %neg.b 492 ret <6 x half> %mul 493} 494 495define <8 x half> @v_fmul_v8f16(<8 x half> %a, <8 x half> %b) { 496; GFX9-LABEL: v_fmul_v8f16: 497; GFX9: ; %bb.0: 498; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 499; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 500; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 501; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 502; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 503; GFX9-NEXT: s_setpc_b64 s[30:31] 504; 505; GFX8-LABEL: v_fmul_v8f16: 506; GFX8: ; %bb.0: 507; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 508; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4 509; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 510; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5 511; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 512; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6 513; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 514; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7 515; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 516; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 517; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 518; GFX8-NEXT: v_or_b32_e32 v2, v5, v2 519; GFX8-NEXT: v_or_b32_e32 v3, v6, v3 520; GFX8-NEXT: s_setpc_b64 s[30:31] 521; 522; GFX10-LABEL: v_fmul_v8f16: 523; GFX10: ; %bb.0: 524; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 525; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 526; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 527; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 528; GFX10-NEXT: v_pk_mul_f16 v3, v3, v7 529; GFX10-NEXT: s_setpc_b64 s[30:31] 530 %mul = fmul <8 x half> %a, %b 531 ret <8 x half> %mul 532} 533 534define <8 x half> @v_fmul_v8f16_fneg_lhs(<8 x half> %a, <8 x half> %b) { 535; GFX9-LABEL: v_fmul_v8f16_fneg_lhs: 536; GFX9: ; %bb.0: 537; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 538; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[1,0] neg_hi:[1,0] 539; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[1,0] neg_hi:[1,0] 540; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[1,0] neg_hi:[1,0] 541; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[1,0] neg_hi:[1,0] 542; GFX9-NEXT: s_setpc_b64 s[30:31] 543; 544; GFX8-LABEL: v_fmul_v8f16_fneg_lhs: 545; GFX8: ; %bb.0: 546; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 547; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 548; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 549; GFX8-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 550; GFX8-NEXT: v_xor_b32_e32 v3, 0x80008000, v3 551; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4 552; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 553; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5 554; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 555; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6 556; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 557; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7 558; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 559; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 560; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 561; GFX8-NEXT: v_or_b32_e32 v2, v5, v2 562; GFX8-NEXT: v_or_b32_e32 v3, v6, v3 563; GFX8-NEXT: s_setpc_b64 s[30:31] 564; 565; GFX10-LABEL: v_fmul_v8f16_fneg_lhs: 566; GFX10: ; %bb.0: 567; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 568; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[1,0] neg_hi:[1,0] 569; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[1,0] neg_hi:[1,0] 570; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[1,0] neg_hi:[1,0] 571; GFX10-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[1,0] neg_hi:[1,0] 572; GFX10-NEXT: s_setpc_b64 s[30:31] 573 %neg.a = fneg <8 x half> %a 574 %mul = fmul <8 x half> %neg.a, %b 575 ret <8 x half> %mul 576} 577 578define <8 x half> @v_fmul_v8f16_fneg_rhs(<8 x half> %a, <8 x half> %b) { 579; GFX9-LABEL: v_fmul_v8f16_fneg_rhs: 580; GFX9: ; %bb.0: 581; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 582; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] 583; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] 584; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1] 585; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1] 586; GFX9-NEXT: s_setpc_b64 s[30:31] 587; 588; GFX8-LABEL: v_fmul_v8f16_fneg_rhs: 589; GFX8: ; %bb.0: 590; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 591; GFX8-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 592; GFX8-NEXT: v_xor_b32_e32 v5, 0x80008000, v5 593; GFX8-NEXT: v_xor_b32_e32 v6, 0x80008000, v6 594; GFX8-NEXT: v_xor_b32_e32 v7, 0x80008000, v7 595; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4 596; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 597; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5 598; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 599; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6 600; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 601; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7 602; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 603; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 604; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 605; GFX8-NEXT: v_or_b32_e32 v2, v5, v2 606; GFX8-NEXT: v_or_b32_e32 v3, v6, v3 607; GFX8-NEXT: s_setpc_b64 s[30:31] 608; 609; GFX10-LABEL: v_fmul_v8f16_fneg_rhs: 610; GFX10: ; %bb.0: 611; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 neg_lo:[0,1] neg_hi:[0,1] 613; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 neg_lo:[0,1] neg_hi:[0,1] 614; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 neg_lo:[0,1] neg_hi:[0,1] 615; GFX10-NEXT: v_pk_mul_f16 v3, v3, v7 neg_lo:[0,1] neg_hi:[0,1] 616; GFX10-NEXT: s_setpc_b64 s[30:31] 617 %neg.b = fneg <8 x half> %b 618 %mul = fmul <8 x half> %a, %neg.b 619 ret <8 x half> %mul 620} 621 622define <8 x half> @v_fmul_v8f16_fneg_lhs_fneg_rhs(<8 x half> %a, <8 x half> %b) { 623; GFX9-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs: 624; GFX9: ; %bb.0: 625; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 626; GFX9-NEXT: v_pk_mul_f16 v0, v0, v4 627; GFX9-NEXT: v_pk_mul_f16 v1, v1, v5 628; GFX9-NEXT: v_pk_mul_f16 v2, v2, v6 629; GFX9-NEXT: v_pk_mul_f16 v3, v3, v7 630; GFX9-NEXT: s_setpc_b64 s[30:31] 631; 632; GFX8-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs: 633; GFX8: ; %bb.0: 634; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 635; GFX8-NEXT: v_mul_f16_e32 v8, v0, v4 636; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 637; GFX8-NEXT: v_mul_f16_e32 v4, v1, v5 638; GFX8-NEXT: v_mul_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 639; GFX8-NEXT: v_mul_f16_e32 v5, v2, v6 640; GFX8-NEXT: v_mul_f16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 641; GFX8-NEXT: v_mul_f16_e32 v6, v3, v7 642; GFX8-NEXT: v_mul_f16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 643; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 644; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 645; GFX8-NEXT: v_or_b32_e32 v2, v5, v2 646; GFX8-NEXT: v_or_b32_e32 v3, v6, v3 647; GFX8-NEXT: s_setpc_b64 s[30:31] 648; 649; GFX10-LABEL: v_fmul_v8f16_fneg_lhs_fneg_rhs: 650; GFX10: ; %bb.0: 651; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 652; GFX10-NEXT: v_pk_mul_f16 v0, v0, v4 653; GFX10-NEXT: v_pk_mul_f16 v1, v1, v5 654; GFX10-NEXT: v_pk_mul_f16 v2, v2, v6 655; GFX10-NEXT: v_pk_mul_f16 v3, v3, v7 656; GFX10-NEXT: s_setpc_b64 s[30:31] 657 %neg.a = fneg <8 x half> %a 658 %neg.b = fneg <8 x half> %b 659 %mul = fmul <8 x half> %neg.a, %neg.b 660 ret <8 x half> %mul 661} 662