1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s 6 7define <2 x i16> @v_mul_v2i16(<2 x i16> %a, <2 x i16> %b) { 8; GFX9-LABEL: v_mul_v2i16: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 12; GFX9-NEXT: s_setpc_b64 s[30:31] 13; 14; GFX8-LABEL: v_mul_v2i16: 15; GFX8: ; %bb.0: 16; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17; GFX8-NEXT: v_mul_lo_u16_e32 v2, v0, v1 18; GFX8-NEXT: v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 19; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 20; GFX8-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX10-LABEL: v_mul_v2i16: 23; GFX10: ; %bb.0: 24; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 26; GFX10-NEXT: s_setpc_b64 s[30:31] 27 %mul = mul <2 x i16> %a, %b 28 ret <2 x i16> %mul 29} 30 31define <2 x i16> @v_mul_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) { 32; GFX9-LABEL: v_mul_v2i16_fneg_lhs: 33; GFX9: ; %bb.0: 34; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] 36; GFX9-NEXT: s_setpc_b64 s[30:31] 37; 38; GFX8-LABEL: v_mul_v2i16_fneg_lhs: 39; GFX8: ; %bb.0: 40; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 42; GFX8-NEXT: v_mul_lo_u16_e32 v2, v0, v1 43; GFX8-NEXT: v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 44; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 45; GFX8-NEXT: s_setpc_b64 s[30:31] 46; 47; GFX10-LABEL: v_mul_v2i16_fneg_lhs: 48; GFX10: ; %bb.0: 49; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] 51; GFX10-NEXT: s_setpc_b64 s[30:31] 52 %neg.a = fneg <2 x half> %a 53 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16> 54 %mul = mul <2 x i16> %cast.neg.a, %b 55 ret <2 x i16> %mul 56} 57 58define <2 x i16> @v_mul_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) { 59; GFX9-LABEL: v_mul_v2i16_fneg_rhs: 60; GFX9: ; %bb.0: 61; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] 63; GFX9-NEXT: s_setpc_b64 s[30:31] 64; 65; GFX8-LABEL: v_mul_v2i16_fneg_rhs: 66; GFX8: ; %bb.0: 67; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 69; GFX8-NEXT: v_mul_lo_u16_e32 v2, v0, v1 70; GFX8-NEXT: v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 71; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 72; GFX8-NEXT: s_setpc_b64 s[30:31] 73; 74; GFX10-LABEL: v_mul_v2i16_fneg_rhs: 75; GFX10: ; %bb.0: 76; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 77; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] 78; GFX10-NEXT: s_setpc_b64 s[30:31] 79 %neg.b = fneg <2 x half> %b 80 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16> 81 %mul = mul <2 x i16> %a, %cast.neg.b 82 ret <2 x i16> %mul 83} 84 85define <2 x i16> @v_mul_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) { 86; GFX9-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs: 87; GFX9: ; %bb.0: 88; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] 90; GFX9-NEXT: s_setpc_b64 s[30:31] 91; 92; GFX8-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs: 93; GFX8: ; %bb.0: 94; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 96; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 97; GFX8-NEXT: v_mul_lo_u16_e32 v2, v0, v1 98; GFX8-NEXT: v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 99; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 100; GFX8-NEXT: s_setpc_b64 s[30:31] 101; 102; GFX10-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs: 103; GFX10: ; %bb.0: 104; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] 106; GFX10-NEXT: s_setpc_b64 s[30:31] 107 %neg.a = fneg <2 x half> %a 108 %neg.b = fneg <2 x half> %b 109 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16> 110 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16> 111 %mul = mul <2 x i16> %cast.neg.a, %cast.neg.b 112 ret <2 x i16> %mul 113} 114