xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.v2i16.ll (revision f2c164c8150548d983565c4ddc0fde790f9e2a5b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX10 %s
6
7define <2 x i16> @v_mul_v2i16(<2 x i16> %a, <2 x i16> %b) {
8; GFX9-LABEL: v_mul_v2i16:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
12; GFX9-NEXT:    s_setpc_b64 s[30:31]
13;
14; GFX8-LABEL: v_mul_v2i16:
15; GFX8:       ; %bb.0:
16; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17; GFX8-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
18; GFX8-NEXT:    v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
19; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
20; GFX8-NEXT:    s_setpc_b64 s[30:31]
21;
22; GFX10-LABEL: v_mul_v2i16:
23; GFX10:       ; %bb.0:
24; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v0, v1
26; GFX10-NEXT:    s_setpc_b64 s[30:31]
27  %mul = mul <2 x i16> %a, %b
28  ret <2 x i16> %mul
29}
30
31define <2 x i16> @v_mul_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) {
32; GFX9-LABEL: v_mul_v2i16_fneg_lhs:
33; GFX9:       ; %bb.0:
34; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
36; GFX9-NEXT:    s_setpc_b64 s[30:31]
37;
38; GFX8-LABEL: v_mul_v2i16_fneg_lhs:
39; GFX8:       ; %bb.0:
40; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
42; GFX8-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
43; GFX8-NEXT:    v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
44; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
45; GFX8-NEXT:    s_setpc_b64 s[30:31]
46;
47; GFX10-LABEL: v_mul_v2i16_fneg_lhs:
48; GFX10:       ; %bb.0:
49; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
51; GFX10-NEXT:    s_setpc_b64 s[30:31]
52  %neg.a = fneg <2 x half> %a
53  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
54  %mul = mul <2 x i16> %cast.neg.a, %b
55  ret <2 x i16> %mul
56}
57
58define <2 x i16> @v_mul_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) {
59; GFX9-LABEL: v_mul_v2i16_fneg_rhs:
60; GFX9:       ; %bb.0:
61; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
63; GFX9-NEXT:    s_setpc_b64 s[30:31]
64;
65; GFX8-LABEL: v_mul_v2i16_fneg_rhs:
66; GFX8:       ; %bb.0:
67; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
69; GFX8-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
70; GFX8-NEXT:    v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
71; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
72; GFX8-NEXT:    s_setpc_b64 s[30:31]
73;
74; GFX10-LABEL: v_mul_v2i16_fneg_rhs:
75; GFX10:       ; %bb.0:
76; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
78; GFX10-NEXT:    s_setpc_b64 s[30:31]
79  %neg.b = fneg <2 x half> %b
80  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
81  %mul = mul <2 x i16> %a, %cast.neg.b
82  ret <2 x i16> %mul
83}
84
85define <2 x i16> @v_mul_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) {
86; GFX9-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs:
87; GFX9:       ; %bb.0:
88; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GFX9-NEXT:    v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
90; GFX9-NEXT:    s_setpc_b64 s[30:31]
91;
92; GFX8-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs:
93; GFX8:       ; %bb.0:
94; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
96; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
97; GFX8-NEXT:    v_mul_lo_u16_e32 v2, v0, v1
98; GFX8-NEXT:    v_mul_lo_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
99; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0
100; GFX8-NEXT:    s_setpc_b64 s[30:31]
101;
102; GFX10-LABEL: v_mul_v2i16_fneg_lhs_fneg_rhs:
103; GFX10:       ; %bb.0:
104; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; GFX10-NEXT:    v_pk_mul_lo_u16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1]
106; GFX10-NEXT:    s_setpc_b64 s[30:31]
107  %neg.a = fneg <2 x half> %a
108  %neg.b = fneg <2 x half> %b
109  %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16>
110  %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16>
111  %mul = mul <2 x i16> %cast.neg.a, %cast.neg.b
112  ret <2 x i16> %mul
113}
114